diff --git a/.gitignore b/.gitignore
index f0fe0df..2d1815e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -200,4 +200,5 @@ marimo/_lsp/
__marimo__/
# Streamlit
-.streamlit/secrets.toml
\ No newline at end of file
+.streamlit/secrets.toml
+*.out
\ No newline at end of file
diff --git a/autockt/envs/ngspice_ledro_d_fc.py b/autockt/envs/ngspice_ledro_d_fc.py
new file mode 100644
index 0000000..83fd8f6
--- /dev/null
+++ b/autockt/envs/ngspice_ledro_d_fc.py
@@ -0,0 +1,365 @@
+"""
+A new ckt environment based on a new structure of MDP
+"""
+
+import gym
+from gym import spaces
+
+import numpy as np
+import random
+import psutil
+
+from multiprocessing.dummy import Pool as ThreadPool
+from collections import OrderedDict
+import yaml
+import yaml.constructor
+import statistics
+import os
+import IPython
+import itertools
+from eval_engines.util.core import *
+import pickle
+import os
+
+from eval_engines.ngspice.TwoStageClass import *
+from eval_engines.ngspice.LEDRO_D_FC import *
+
+
+from loguru import logger
+import sys
+
+# Custom format string
+log_format = (
+ "{time:YYYY-MM-DD HH:mm:ss.SSS} | "
+ "{level: <8} | "
+ "{module}:{function}:{line} - "
+ "{message}"
+)
+
+# Clear default logger
+logger.remove()
+
+# Log to stdout
+logger.add(sys.stdout, format=log_format, level="DEBUG")
+
+# Log to file with rotation and retention
+logger.add(
+ "logs/ngspice_ledro_d_fc45.log",
+ format=log_format,
+ level="DEBUG",
+ rotation="1 day",
+ retention="7 days",
+)
+
+
+# way of ordering the way a yaml file is read
+class OrderedDictYAMLLoader(yaml.Loader):
+ """
+ A YAML loader that loads mappings into ordered dictionaries.
+ """
+
+ def __init__(self, *args, **kwargs):
+ yaml.Loader.__init__(self, *args, **kwargs)
+
+ self.add_constructor("tag:yaml.org,2002:map", type(self).construct_yaml_map)
+ self.add_constructor("tag:yaml.org,2002:omap", type(self).construct_yaml_map)
+
+ def construct_yaml_map(self, node):
+ data = OrderedDict()
+ yield data
+ value = self.construct_mapping(node)
+ data.update(value)
+
+ def construct_mapping(self, node, deep=False):
+ if isinstance(node, yaml.MappingNode):
+ self.flatten_mapping(node)
+ else:
+ raise yaml.constructor.ConstructorError(
+ None,
+ None,
+ "expected a mapping node, but found %s" % node.id,
+ node.start_mark,
+ )
+
+ mapping = OrderedDict()
+ for key_node, value_node in node.value:
+ key = self.construct_object(key_node, deep=deep)
+ value = self.construct_object(value_node, deep=deep)
+ mapping[key] = value
+ return mapping
+
+
+class LEDRO_D_FC(gym.Env):
+ metadata = {"render.modes": ["human"]}
+
+ PERF_LOW = -1
+ PERF_HIGH = 0
+
+ # obtains yaml file
+ path = os.getcwd()
+ CIR_YAML = path + "/eval_engines/ngspice/ngspice_inputs/yaml_files/ledro_d_fc.yaml"
+
+ def __init__(self, env_config):
+ self.multi_goal = env_config.get("multi_goal", False)
+ self.generalize = env_config.get("generalize", False)
+ num_valid = env_config.get("num_valid", 50)
+ self.specs_save = env_config.get("save_specs", False)
+ self.valid = env_config.get("run_valid", False)
+
+ self.env_steps = 0
+ with open(LEDRO_D_FC.CIR_YAML, "r") as f:
+ yaml_data = yaml.load(f, OrderedDictYAMLLoader)
+
+ # design specs
+ if self.generalize == False:
+ specs = yaml_data["target_specs"]
+ else:
+ load_specs_path = (
+ LEDRO_D_FC.path + "/autockt/gen_specs/ngspice_specs_gen_ledro_d_fc"
+ )
+ with open(load_specs_path, "rb") as f:
+ specs = pickle.load(f)
+
+ self.specs = OrderedDict(sorted(specs.items(), key=lambda k: k[0]))
+ if self.specs_save:
+ with open(
+ "specs_" + str(num_valid) + str(random.randint(1, 100000)), "wb"
+ ) as f:
+ pickle.dump(self.specs, f)
+
+ self.specs_ideal = []
+ self.specs_id = list(self.specs.keys())
+ self.fixed_goal_idx = -1
+ self.num_os = len(list(self.specs.values())[0])
+
+ # param array
+ params = yaml_data["params"]
+ self.params = []
+ self.params_id = list(params.keys())
+
+ for value in params.values():
+ param_vec = np.linspace(value[0], value[1], value[2])
+ self.params.append(param_vec)
+
+ # initialize sim environment
+ self.sim_env = LEDRO_D_FC_Class(
+ yaml_path=LEDRO_D_FC.CIR_YAML, num_process=1, path=LEDRO_D_FC.path
+ )
+ self.action_meaning = [-1, 0, 2]
+ self.action_space = spaces.Tuple(
+ [spaces.Discrete(len(self.action_meaning))] * len(self.params_id)
+ )
+ # self.action_space = spaces.Discrete(len(self.action_meaning)**len(self.params_id))
+ self.observation_space = spaces.Box(
+ low=np.array(
+ [LEDRO_D_FC.PERF_LOW] * 2 * len(self.specs_id)
+ + len(self.params_id) * [1]
+ ),
+ high=np.array(
+ [LEDRO_D_FC.PERF_HIGH] * 2 * len(self.specs_id)
+ + len(self.params_id) * [1]
+ ),
+ )
+
+ # initialize current param/spec observations
+ self.cur_specs = np.zeros(len(self.specs_id), dtype=np.float32)
+ self.cur_params_idx = np.zeros(len(self.params_id), dtype=np.int32)
+
+ # Get the g* (overall design spec) you want to reach
+ self.global_g = []
+ for spec in list(self.specs.values()):
+ self.global_g.append(float(spec[self.fixed_goal_idx]))
+ self.g_star = np.array(self.global_g)
+ self.global_g = np.array(yaml_data["normalize"])
+
+ # objective number (used for validation)
+ self.obj_idx = 0
+
+ def reset(self):
+ # if multi-goal is selected, every time reset occurs, it will select a different design spec as objective
+ if self.generalize == True:
+ if self.valid == True:
+ if self.obj_idx > self.num_os - 1:
+ self.obj_idx = 0
+ idx = self.obj_idx
+ self.obj_idx += 1
+ else:
+ idx = random.randint(0, self.num_os - 1)
+ self.specs_ideal = []
+ for spec in list(self.specs.values()):
+ self.specs_ideal.append(spec[idx])
+ self.specs_ideal = np.array(self.specs_ideal)
+ else:
+ if self.multi_goal == False:
+ self.specs_ideal = self.g_star
+ else:
+ idx = random.randint(0, self.num_os - 1)
+ self.specs_ideal = []
+ for spec in list(self.specs.values()):
+ self.specs_ideal.append(spec[idx])
+ self.specs_ideal = np.array(self.specs_ideal)
+ # print("num total:"+str(self.num_os))
+
+ # applicable only when you have multiple goals, normalizes everything to some global_g
+ self.specs_ideal_norm = self.lookup(self.specs_ideal, self.global_g)
+
+ # initialize current parameters
+ self.cur_params_idx = np.array([2] * 17)
+ self.cur_params_idx = np.array(
+ # [2, 2, 2, 2, 2, 2] + [200, 200, 200, 200, 200, 200] + [10, 10, 10, 10, 10]
+ [2, 2, 2, 2, 2, 2]
+ + [200, 200, 200, 200, 200, 200]
+ + [10, 10, 10, 10, 10]
+ )
+
+ self.cur_specs = self.update(self.cur_params_idx)
+ cur_spec_norm = self.lookup(self.cur_specs, self.global_g)
+ reward = self.reward(self.cur_specs, self.specs_ideal)
+
+ # observation is a combination of current specs distance from ideal, ideal spec, and current param vals
+ self.ob = np.concatenate(
+ [cur_spec_norm, self.specs_ideal_norm, self.cur_params_idx]
+ )
+ return self.ob
+
+ def step(self, action):
+ """
+ :param action: is vector with elements between 0 and 1 mapped to the index of the corresponding parameter
+ :return:
+ """
+
+ # Take action that RL agent returns to change current params
+ action = list(np.reshape(np.array(action), (np.array(action).shape[0],)))
+ self.cur_params_idx = self.cur_params_idx + np.array(
+ [self.action_meaning[a] for a in action]
+ )
+
+ # self.cur_params_idx = self.cur_params_idx + np.array(self.action_arr[int(action)])
+ self.cur_params_idx = np.clip(
+ self.cur_params_idx,
+ [0] * len(self.params_id),
+ [(len(param_vec) - 1) for param_vec in self.params],
+ )
+
+ # Get current specs and normalize
+ self.cur_specs = self.update(self.cur_params_idx)
+ cur_spec_norm = self.lookup(self.cur_specs, self.global_g)
+ reward = self.reward(self.cur_specs, self.specs_ideal)
+ done = False
+
+ # incentivize reaching goal state
+ if reward >= 10:
+ done = True
+ print("-" * 10)
+ print("params = ", self.cur_params_idx)
+ print("specs:", self.cur_specs)
+ print("ideal specs:", self.specs_ideal)
+ print("re:", reward)
+ print("-" * 10)
+
+ self.ob = np.concatenate(
+ [cur_spec_norm, self.specs_ideal_norm, self.cur_params_idx]
+ )
+ self.env_steps = self.env_steps + 1
+ print("***cur params idx:", self.cur_params_idx, "specs: ", self.cur_specs, " reward: ", reward)
+
+ # print('cur ob:' + str(self.cur_specs))
+ # print('ideal spec:' + str(self.specs_ideal))
+ # print(reward)
+ return self.ob, reward, done, {}
+
+ def lookup(self, spec, goal_spec):
+ goal_spec = [float(e) for e in goal_spec]
+ norm_spec = (spec - goal_spec) / (goal_spec + spec)
+ return norm_spec
+
+ def reward(self, spec, goal_spec):
+ """
+ Reward: doesn't penalize for overshooting spec, is negative
+ """
+ # rel_specs = self.lookup(spec, goal_spec)
+ # pos_val = []
+ # reward = 0.0
+ # for i, rel_spec in enumerate(rel_specs):
+ # if self.specs_id[i] == "ibias_max":
+ # rel_spec = rel_spec * -1.0 # /10.0
+ # if rel_spec < 0:
+ # reward += rel_spec
+ # pos_val.append(0)
+ # else:
+ # pos_val.append(1)
+
+ # return reward if reward < -0.02 else 10
+ norm_specs = self.lookup(spec, goal_spec)
+
+ # pay attention to reward calculation, this is not quite the reward function in RL
+ # but rather a penalty value for the optimization process
+ reward = 0
+ for i, rel_spec in enumerate(norm_specs):
+ # For power, smaller is better
+ # For gain, larger (compared to the target/goal) is better
+ # For other specs (pm, ugbw, etc.), smaller is better
+ assert self.specs_id[i] in ["ibias_max", "gain_min", "ugbw_min", "phm_min"]
+ if self.specs_id[i] == "ibias_max" and rel_spec > 0:
+ reward += np.abs(rel_spec) # /10
+ elif self.specs_id[i] == "gain_min" and rel_spec < 0:
+ reward += 3 * np.abs(rel_spec) # /10
+ elif self.specs_id[i] != "ibias_max" and rel_spec < 0:
+ reward += np.abs(rel_spec)
+ return -reward
+
+
+ def update(self, params_idx):
+ """
+
+ :param action: an int between 0 ... n-1
+ :return:
+ """
+
+ params = [self.params[i][params_idx[i]] for i in range(len(self.params_id))]
+ param_val = [OrderedDict(list(zip(self.params_id, params)))]
+
+ # run param vals and simulate
+ cur_specs = OrderedDict(
+ sorted(
+ self.sim_env.create_design_and_simulate(param_val[0])[1].items(),
+ key=lambda k: k[0],
+ )
+ )
+ cur_specs = np.array(list(cur_specs.values()))
+
+ return cur_specs
+
+
+def main():
+ env_config = {"generalize": True, "valid": True}
+ env = LEDRO_D_FC(env_config)
+ env.reset()
+ # env.step(
+ # [
+ # 2,
+ # 2,
+ # 2,
+ # 2,
+ # 2,
+ # 2,
+ # 10 - 9,
+ # 10 - 9,
+ # 10 - 9,
+ # 10 - 9,
+ # 10 - 9,
+ # 10 - 9,
+ # 0.2,
+ # 0.2,
+ # 0.2,
+ # 0.2,
+ # 0.2,
+ # ]
+ # )
+ env.step([2] * 17)
+
+ IPython.embed()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/autockt/envs/ngspice_ledro_d_fc45.py b/autockt/envs/ngspice_ledro_d_fc45.py
new file mode 100644
index 0000000..213408b
--- /dev/null
+++ b/autockt/envs/ngspice_ledro_d_fc45.py
@@ -0,0 +1,496 @@
+"""
+A new ckt environment based on a new structure of MDP
+"""
+
+import gym
+from gym import spaces
+
+import numpy as np
+import random
+import psutil
+
+from multiprocessing.dummy import Pool as ThreadPool
+from collections import OrderedDict
+import yaml
+import yaml.constructor
+import statistics
+import os
+import IPython
+import itertools
+from eval_engines.util.core import *
+import pickle
+import os
+
+from eval_engines.ngspice.TwoStageClass import *
+from eval_engines.ngspice.LEDRO_D_FC import *
+from eval_engines.ngspice.LEDRO_D_FC45 import *
+
+
+from loguru import logger
+import sys
+
+from torch.utils.tensorboard import SummaryWriter
+import numpy as np
+
+# Writer will output to ./runs/ directory by default
+writer = SummaryWriter()
+
+
+# Custom format string
+log_format = (
+ "{time:YYYY-MM-DD HH:mm:ss.SSS} | "
+ "{level: <8} | "
+ "{module}:{function}:{line} - "
+ "{message}"
+)
+
+# Clear default logger
+logger.remove()
+
+# Log to stdout
+logger.add(sys.stdout, format=log_format, level="DEBUG")
+
+# Log to file with rotation and retention
+logger.add(
+ "logs/ngspice_ledro_d_fc.log",
+ format=log_format,
+ level="DEBUG",
+ rotation="1 day",
+ retention="7 days",
+)
+
+class ActionNormalizer():
+ """Rescale and relocate the actions."""
+ def __init__(self, action_space_low, action_space_high):
+
+ self.action_space_low = action_space_low
+ self.action_space_high = action_space_high
+
+ def action(self, action: np.ndarray) -> np.ndarray:
+ """Change the range (-1, 1) to (low, high)."""
+ low = self.action_space_low
+ high = self.action_space_high
+
+ scale_factor = (high - low) / 2
+ reloc_factor = high - scale_factor
+
+ action = action * scale_factor + reloc_factor
+ action = np.clip(action, low, high)
+
+ return action
+
+ def reverse_action(self, action: np.ndarray) -> np.ndarray:
+ """Change the range (low, high) to (-1, 1)."""
+ low = self.action_space_low
+ high = self.action_space_high
+
+ scale_factor = (high - low) / 2
+ reloc_factor = high - scale_factor
+
+ action = (action - reloc_factor) / scale_factor
+ action = np.clip(action, -1.0, 1.0)
+
+ return action
+
+# way of ordering the way a yaml file is read
+class OrderedDictYAMLLoader(yaml.Loader):
+ """
+ A YAML loader that loads mappings into ordered dictionaries.
+ """
+
+ def __init__(self, *args, **kwargs):
+ yaml.Loader.__init__(self, *args, **kwargs)
+
+ self.add_constructor("tag:yaml.org,2002:map", type(self).construct_yaml_map)
+ self.add_constructor("tag:yaml.org,2002:omap", type(self).construct_yaml_map)
+
+ def construct_yaml_map(self, node):
+ data = OrderedDict()
+ yield data
+ value = self.construct_mapping(node)
+ data.update(value)
+
+ def construct_mapping(self, node, deep=False):
+ if isinstance(node, yaml.MappingNode):
+ self.flatten_mapping(node)
+ else:
+ raise yaml.constructor.ConstructorError(
+ None,
+ None,
+ "expected a mapping node, but found %s" % node.id,
+ node.start_mark,
+ )
+
+ mapping = OrderedDict()
+ for key_node, value_node in node.value:
+ key = self.construct_object(key_node, deep=deep)
+ value = self.construct_object(value_node, deep=deep)
+ mapping[key] = value
+ return mapping
+
+
+class LEDRO_D_FC45(gym.Env):
+ metadata = {"render.modes": ["human"]}
+
+ PERF_LOW = -1
+ PERF_HIGH = 0
+
+ # obtains yaml file
+ path = os.getcwd()
+ CIR_YAML = (
+ path + "/eval_engines/ngspice/ngspice_inputs/yaml_files/ledro_d_fc45.yaml"
+ )
+
+ def __init__(self, env_config):
+ self.multi_goal = env_config.get("multi_goal", False)
+ self.generalize = env_config.get("generalize", False)
+ num_valid = env_config.get("num_valid", 50)
+ self.specs_save = env_config.get("save_specs", False)
+ self.valid = env_config.get("run_valid", False)
+
+ self.env_steps = 0
+ with open(LEDRO_D_FC45.CIR_YAML, "r") as f:
+ yaml_data = yaml.load(f, OrderedDictYAMLLoader)
+
+ # design specs
+ if self.generalize == False:
+ specs = yaml_data["target_specs"]
+ else:
+ load_specs_path = (
+ LEDRO_D_FC45.path + "/autockt/gen_specs/ngspice_specs_gen_ledro_d_fc45"
+ )
+ with open(load_specs_path, "rb") as f:
+ specs = pickle.load(f)
+
+ self.specs = OrderedDict(sorted(specs.items(), key=lambda k: k[0]))
+ if self.specs_save:
+ with open(
+ "specs_" + str(num_valid) + str(random.randint(1, 100000)), "wb"
+ ) as f:
+ pickle.dump(self.specs, f)
+
+ self.specs_ideal = []
+ self.specs_id = list(self.specs.keys())
+ self.fixed_goal_idx = -1
+ self.num_os = len(list(self.specs.values())[0])
+
+ # param array
+ params = yaml_data["params"]
+ self.params = []
+ self.params_id = list(params.keys())
+
+ for value in params.values():
+ param_vec = np.linspace(value[0], value[1], value[2])
+ self.params.append(param_vec)
+
+ # initialize sim environment
+ self.sim_env = LEDRO_D_FC45_Class(
+ yaml_path=LEDRO_D_FC45.CIR_YAML, num_process=1, path=LEDRO_D_FC45.path
+ )
+ # self.action_meaning = [-1, 0, 2]
+ # self.action_space = spaces.Tuple(
+ # [spaces.Discrete(len(self.action_meaning))] * len(self.params_id)
+ # )
+ self.action_space = spaces.Box(low=-1, high=1, shape=(25, ), dtype=np.float64)
+ # print (action_space.sample())
+
+ # L: Rationale: start at ~2× technology minimum to reduce short-channel effects and improve matching.
+ action_space_low = np.array(
+ [
+ 0.12, 90, 1,
+ 0.12, 90, 1,
+ 0.12, 90, 1,
+ 0.12, 90, 1,
+ 0.12, 90, 1,
+ 0.12, 90, 1,
+ 0.1,
+ 0.1,
+ 0.1,
+ 0.1,
+ 0.1,
+
+ 1,
+ 1
+
+ ]
+ )
+
+ action_space_high = np.array(
+ [
+ 200, 2000, 100,
+ 200, 2000, 100,
+ 200, 2000, 100,
+ 200, 2000, 100,
+ 200, 2000, 100,
+ 200, 2000, 100,
+ 1.2,
+ 1.2,
+ 1.2,
+ 1.2,
+ 1.2,
+
+ 50,
+ 50
+
+ ]
+ )
+ self.action_normalizer = ActionNormalizer(action_space_low=action_space_low, action_space_high = action_space_high)
+
+
+ # self.action_space = spaces.Discrete(len(self.action_meaning)**len(self.params_id))
+ self.observation_space = spaces.Box(
+ low=np.array(
+ [LEDRO_D_FC45.PERF_LOW] * 2 * len(self.specs_id)
+ + len(self.params_id) * [1]
+ ),
+ high=np.array(
+ [LEDRO_D_FC45.PERF_HIGH] * 2 * len(self.specs_id)
+ + len(self.params_id) * [1]
+ ),
+ )
+
+ # initialize current param/spec observations
+ self.cur_specs = np.zeros(len(self.specs_id), dtype=np.float32)
+ self.cur_params_idx = np.zeros(len(self.params_id), dtype=np.int32)
+
+ # Get the g* (overall design spec) you want to reach
+ self.global_g = []
+ for spec in list(self.specs.values()):
+ self.global_g.append(float(spec[self.fixed_goal_idx]))
+ self.g_star = np.array(self.global_g)
+ self.global_g = np.array(yaml_data["normalize"])
+
+ # objective number (used for validation)
+ self.obj_idx = 0
+
+ def reset(self):
+ # if multi-goal is selected, every time reset occurs, it will select a different design spec as objective
+ if self.generalize == True:
+ if self.valid == True:
+ if self.obj_idx > self.num_os - 1:
+ self.obj_idx = 0
+ idx = self.obj_idx
+ self.obj_idx += 1
+ else:
+ idx = random.randint(0, self.num_os - 1)
+ self.specs_ideal = []
+ for spec in list(self.specs.values()):
+ self.specs_ideal.append(spec[idx])
+ self.specs_ideal = np.array(self.specs_ideal)
+ else:
+ if self.multi_goal == False:
+ self.specs_ideal = self.g_star
+ else:
+ idx = random.randint(0, self.num_os - 1)
+ self.specs_ideal = []
+ for spec in list(self.specs.values()):
+ self.specs_ideal.append(spec[idx])
+ self.specs_ideal = np.array(self.specs_ideal)
+ # print("num total:"+str(self.num_os))
+
+ # applicable only when you have multiple goals, normalizes everything to some global_g
+ self.specs_ideal_norm = self.lookup(self.specs_ideal, self.global_g)
+
+ # initialize current parameters
+ # self.cur_params_idx = np.array([2] * 17)
+ # self.cur_params_idx = np.array(
+ # # [2, 2, 2, 2, 2, 2] + [200, 200, 200, 200, 200, 200] + [10, 10, 10, 10, 10]
+ # [33, 33, 33, 33, 33, 33]
+ # + [10, 10, 10, 10, 10]
+ # )
+ self.cur_params_idx = np.array([193.9020858253666 ,1318.8789684310007, 66, 106.3710338395701,
+ 517.342182701802, 69, 51.58973768557556 ,1148.0132888755084, 68,
+ 62.67064928753026, 1360.398953352503, 18, 73.15862009109797,
+ 1718.7658807907076, 61, 192.34328350363728, 1205.9109268448633, 27,
+ 0.7761091728857539, 1.1431755589519739, 0.6157432007527375,
+ 1.1177122194734337, 0.9243351863878987, 24, 9])
+
+ self.cur_specs = self.update(self.cur_params_idx)
+ cur_spec_norm = self.lookup(self.cur_specs, self.global_g)
+ reward = self.reward(self.cur_specs, self.specs_ideal)
+
+ # observation is a combination of current specs distance from ideal, ideal spec, and current param vals
+ self.ob = np.concatenate(
+ [cur_spec_norm, self.specs_ideal_norm, self.cur_params_idx]
+ )
+ return self.ob
+
+ def step(self, action):
+ """
+ :param action: is vector with elements between 0 and 1 mapped to the index of the corresponding parameter
+ :return:
+ """
+
+ # Take action that RL agent returns to change current params
+ # action = list(np.reshape(np.array(action), (np.array(action).shape[0],)))
+ # self.cur_params_idx = self.cur_params_idx + np.array(
+ # [self.action_meaning[a] for a in action]
+ # )
+
+ # # self.cur_params_idx = self.cur_params_idx + np.array(self.action_arr[int(action)])
+ # self.cur_params_idx = np.clip(
+ # self.cur_params_idx,
+ # [0] * len(self.params_id),
+ # [(len(param_vec) - 1) for param_vec in self.params],
+ # )
+ # # logger.debug(f"current param idx: {str(self.cur_params_idx)}")
+ # # print(f"current param idx: {self.cur_params_idx=}")
+ # logger.debug("current param idx simulation: " + str(self.cur_params_idx))
+ action = self.action_normalizer.action(action) # convert [-1.1] range back to normal range
+ # action = action.astype(object)
+
+ for idx in [2, 2+3, 5+3, 8+3, 11+3, 14+3, -1, -2]:
+ action[idx] = int(action[idx])
+
+ self.cur_params_idx = action
+
+ # Get current specs and normalize
+ self.cur_specs = self.update(self.cur_params_idx)
+ #logger.info("current specs simulation: " + str(self.cur_specs))
+ cur_spec_norm = self.lookup(self.cur_specs, self.global_g)
+ reward = self.reward(self.cur_specs, self.specs_ideal)
+ done = False
+
+ # incentivize reaching goal state
+ if reward >= 10:
+ done = True
+ print("-" * 10)
+ print("params = ", self.cur_params_idx)
+ print("specs:", self.cur_specs)
+ print("ideal specs:", self.specs_ideal)
+ print("re:", reward)
+ print("-" * 10)
+
+ self.ob = np.concatenate(
+ [cur_spec_norm, self.specs_ideal_norm, self.cur_params_idx]
+ )
+ self.env_steps = self.env_steps + 1
+
+ logger.info("current specs:" + str(self.cur_specs) + ", reward: " + str(reward))
+ writer.add_scalar('gain', self.cur_specs[0], self.env_steps)
+ writer.add_scalar('ugbw', self.cur_specs[1], self.env_steps)
+ writer.add_scalar('pm', self.cur_specs[2], self.env_steps)
+ writer.add_scalar('power', self.cur_specs[3], self.env_steps)
+ # print('cur ob:' + str(self.cur_specs))
+ # print('ideal spec:' + str(self.specs_ideal))
+ # print(reward)
+ return self.ob, reward, done, {}
+
+ def lookup(self, spec, goal_spec):
+ goal_spec = [float(e) for e in goal_spec]
+ norm_spec = (spec - goal_spec) / (goal_spec + spec)
+ return norm_spec
+
+ def reward(self, spec, goal_spec):
+ """
+ Reward: doesn't penalize for overshooting spec, is negative
+ """
+ # rel_specs = self.lookup(spec, goal_spec)
+ # pos_val = []
+ # reward = 0.0
+ # for i, rel_spec in enumerate(rel_specs):
+ # if self.specs_id[i] == "ibias_max":
+ # rel_spec = rel_spec * -1.0 # /10.0
+ # if rel_spec < 0:
+ # reward += rel_spec
+ # pos_val.append(0)
+ # else:
+ # pos_val.append(1)
+
+ # return reward if reward < -0.02 else 10
+
+ norm_specs = self.lookup(spec, goal_spec)
+
+ # pay attention to reward calculation, this is not quite the reward function in RL
+ # but rather a penalty value for the optimization process
+ reward = 0
+ for i, rel_spec in enumerate(norm_specs):
+ # For power, smaller is better
+ # For gain, larger (compared to the target/goal) is better
+ # For other specs (pm, ugbw, etc.), smaller is better
+ assert self.specs_id[i] in ["ibias_max", "gain_min", "ugbw_min", "phm_min"]
+ if self.specs_id[i] == "ibias_max" and rel_spec > 0:
+ reward += np.abs(rel_spec) # /10
+ elif self.specs_id[i] == "gain_min" and rel_spec < 0:
+ reward += 3 * np.abs(rel_spec) # /10
+ elif self.specs_id[i] != "ibias_max" and rel_spec < 0:
+ reward += np.abs(rel_spec)
+ return -reward
+
+ def update(self, params_idx):
+ """
+
+ :param action: an int between 0 ... n-1
+ :return:
+ """
+
+ # params = [self.params[i][params_idx[i]] for i in range(len(self.params_id))]
+ # param_val = [OrderedDict(list(zip(self.params_id, params)))]
+
+
+ param_names = [
+ "wp1", "lp1", "mp1",
+ "wp2", "lp2", "mp2",
+ "wp3", "lp3", "mp3",
+ "wp4", "lp4", "mp4",
+ "wp5", "lp5", "mp5",
+ "wp6", "lp6", "mp6",
+
+ "vbiasp1",
+ "vbiasp2",
+
+ "vbiasn0",
+ "vbiasn1",
+ "vbiasn2",
+
+ "cl",
+ "cc"
+ ]
+ param_val = [OrderedDict(list(zip(param_names, params_idx)))]
+
+
+
+ # run param vals and simulate
+ cur_specs = OrderedDict(
+ sorted(
+ self.sim_env.create_design_and_simulate(param_val[0])[1].items(),
+ key=lambda k: k[0],
+ )
+ )
+ cur_specs = np.array(list(cur_specs.values()))
+
+ return cur_specs
+
+
+def main():
+ env_config = {"generalize": True, "valid": True}
+ env = LEDRO_D_FC45(env_config)
+ env.reset()
+ # env.step(
+ # [
+ # 2,
+ # 2,
+ # 2,
+ # 2,
+ # 2,
+ # 2,
+ # 10 - 9,
+ # 10 - 9,
+ # 10 - 9,
+ # 10 - 9,
+ # 10 - 9,
+ # 10 - 9,
+ # 0.2,
+ # 0.2,
+ # 0.2,
+ # 0.2,
+ # 0.2,
+ # ]
+ # )
+ env.step([2] * 11)
+
+ IPython.embed()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/autockt/envs/ngspice_zhenxin_s_fc.py b/autockt/envs/ngspice_zhenxin_s_fc.py
new file mode 100644
index 0000000..aee5c71
--- /dev/null
+++ b/autockt/envs/ngspice_zhenxin_s_fc.py
@@ -0,0 +1,611 @@
+"""
+A new ckt environment based on a new structure of MDP
+"""
+
+import gym
+from gym import spaces
+
+import numpy as np
+import random
+import psutil
+
+from multiprocessing.dummy import Pool as ThreadPool
+from collections import OrderedDict
+import yaml
+import yaml.constructor
+import statistics
+import os
+import IPython
+import itertools
+from eval_engines.util.core import *
+import pickle
+import os
+
+from eval_engines.ngspice.TwoStageClass import *
+
+# ADD_CIRCUIT
+# tip: comment un-used classes to quickly grasp errors
+# from eval_engines.ngspice.LEDRO_D_FC45 import *
+# from eval_engines.ngspice.LEDRO_D_FC import *
+from eval_engines.ngspice.Zhenxin_S_FC import *
+import datetime
+
+
+from loguru import logger
+import sys
+
+from torch.utils.tensorboard import SummaryWriter
+import numpy as np
+
+# Writer will output to ./runs/ directory by default
+
+# get timestamp in form of string
+date_time_obj = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+
+# writer = SummaryWriter(date_time_obj)
+
+
+# Custom format string
+log_format = (
+ "{time:YYYY-MM-DD HH:mm:ss.SSS} | "
+ "{level: <8} | "
+ "{module}:{function}:{line} - "
+ "{message}"
+)
+
+# Clear default logger
+logger.remove()
+
+# Log to stdout
+logger.add(sys.stdout, format=log_format, level="DEBUG")
+
+# Log to file with rotation and retention
+logger.add(
+ "logs/ngspice_ledro_d_fc.log",
+ format=log_format,
+ level="DEBUG",
+ rotation="1 day",
+ retention="7 days",
+)
+
+
+class ActionNormalizer:
+ """Rescale and relocate the actions."""
+
+ def __init__(self, action_space_low, action_space_high):
+
+ self.action_space_low = action_space_low
+ self.action_space_high = action_space_high
+
+ def action(self, action: np.ndarray) -> np.ndarray:
+ """Change the range (-1, 1) to (low, high)."""
+ low = self.action_space_low
+ high = self.action_space_high
+
+ scale_factor = (high - low) / 2
+ reloc_factor = high - scale_factor
+
+ action = action * scale_factor + reloc_factor
+ action = np.clip(action, low, high)
+
+ return action
+
+ def reverse_action(self, action: np.ndarray) -> np.ndarray:
+ """Change the range (low, high) to (-1, 1)."""
+ low = self.action_space_low
+ high = self.action_space_high
+
+ scale_factor = (high - low) / 2
+ reloc_factor = high - scale_factor
+
+ action = (action - reloc_factor) / scale_factor
+ action = np.clip(action, -1.0, 1.0)
+
+ return action
+
+
+# way of ordering the way a yaml file is read
+class OrderedDictYAMLLoader(yaml.Loader):
+ """
+ A YAML loader that loads mappings into ordered dictionaries.
+ """
+
+ def __init__(self, *args, **kwargs):
+ yaml.Loader.__init__(self, *args, **kwargs)
+
+ self.add_constructor("tag:yaml.org,2002:map", type(self).construct_yaml_map)
+ self.add_constructor("tag:yaml.org,2002:omap", type(self).construct_yaml_map)
+
+ def construct_yaml_map(self, node):
+ data = OrderedDict()
+ yield data
+ value = self.construct_mapping(node)
+ data.update(value)
+
+ def construct_mapping(self, node, deep=False):
+ if isinstance(node, yaml.MappingNode):
+ self.flatten_mapping(node)
+ else:
+ raise yaml.constructor.ConstructorError(
+ None,
+ None,
+ "expected a mapping node, but found %s" % node.id,
+ node.start_mark,
+ )
+
+ mapping = OrderedDict()
+ for key_node, value_node in node.value:
+ key = self.construct_object(key_node, deep=deep)
+ value = self.construct_object(value_node, deep=deep)
+ mapping[key] = value
+ return mapping
+
+
+# ADD_CIRCUIT
+class Zhenxin_S_FC(gym.Env):
+ metadata = {"render.modes": ["human"]}
+
+ PERF_LOW = -1
+ PERF_HIGH = 0
+
+ # obtains yaml file
+ path = os.getcwd()
+ # ADD_CIRCUIT
+ CIR_YAML = (
+ path + "/eval_engines/ngspice/ngspice_inputs/yaml_files/zhenxin_s_fc.yaml"
+ )
+
+ def __init__(self, env_config):
+ """
+ Initialize the environment from a configuration dictionary, load circuit specs and parameter grids, set up the simulator, action/observation spaces, and initial state.
+
+ env_config keys recognized:
+ - "multi_goal" (bool): if True, allow multiple per-instance goal vectors; default False.
+ - "generalize" (bool): if True, use precomputed generated specs instead of target_specs from YAML; default False.
+ - "num_valid" (int): count used when optionally saving sampled specs; default 50.
+ - "save_specs" (bool): if True, persist loaded specs to a pickle file; default False.
+ - "run_valid" (bool): validation mode flag used when generalize is True; default False.
+
+ Behavior and side effects:
+ - Loads circuit/design YAML (CIR_YAML) using an ordered YAML loader and either reads target_specs or loads generated specs from disk (when generalize is True).
+ - Constructs an ordered specs mapping, optional saving to a pickle file, and records spec identifiers and the fixed goal index.
+ - Builds parameter value grids from YAML "params" and stores parameter identifiers.
+ - Initializes the simulator interface (self.sim_env).
+ - Defines an 11-dimensional continuous action space in [-1, 1] and an ActionNormalizer that maps actions to the configured physical ranges (action_space_low / action_space_high).
+ - Defines the observation space combining normalized current specs, normalized ideal specs, and current parameter values.
+ - Initializes runtime state containers: self.cur_specs, self.cur_params_idx, self.g_star (design goal values), self.global_g (normalization factors), and self.obj_idx (objective index for validation).
+
+ No return value.
+ """
+ self.multi_goal = env_config.get("multi_goal", False)
+ self.generalize = env_config.get("generalize", False)
+ num_valid = env_config.get("num_valid", 50)
+ self.specs_save = env_config.get("save_specs", False)
+ self.valid = env_config.get("run_valid", False)
+
+ self.env_steps = 0
+ # ADD_CIRCUIT
+ with open(Zhenxin_S_FC.CIR_YAML, "r") as f:
+ yaml_data = yaml.load(f, OrderedDictYAMLLoader)
+
+ # design specs
+ if self.generalize == False:
+ specs = yaml_data["target_specs"]
+ else:
+ load_specs_path = (
+ Zhenxin_S_FC.path
+ + "/autockt/gen_specs/ngspice_specs_gen_zhenxin_s_fc" # ADD_CIRCUIT
+ )
+ with open(load_specs_path, "rb") as f:
+ specs = pickle.load(f)
+
+ self.specs = OrderedDict(sorted(specs.items(), key=lambda k: k[0]))
+ if self.specs_save:
+ with open(
+ "specs_" + str(num_valid) + str(random.randint(1, 100000)), "wb"
+ ) as f:
+ pickle.dump(self.specs, f)
+
+ self.specs_ideal = []
+ self.specs_id = list(self.specs.keys())
+ self.fixed_goal_idx = -1
+ self.num_os = len(list(self.specs.values())[0])
+
+ # param array
+ params = yaml_data["params"]
+ self.params = []
+ self.params_id = list(params.keys())
+
+ for value in params.values():
+ param_vec = np.linspace(value[0], value[1], value[2])
+ self.params.append(param_vec)
+
+ # initialize sim environment
+ # ADD CIRCUIT
+ self.sim_env = Zhenxin_S_FC_Class(
+ yaml_path=Zhenxin_S_FC.CIR_YAML, num_process=1, path=Zhenxin_S_FC.path
+ )
+ # self.action_meaning = [-1, 0, 2]
+ # self.action_space = spaces.Tuple(
+ # [spaces.Discrete(len(self.action_meaning))] * len(self.params_id)
+ # )
+
+ # ADD_CIRCUIT
+ self.action_space = spaces.Box(low=-1, high=1, shape=(11,), dtype=np.float64)
+
+ action_space = spaces.Box(low=-1, high=1, shape=(11,), dtype=np.float64)
+ # print (action_space.sample())
+
+ # fmt: off
+ action_space_low = np.array(
+ [
+ 130,
+ 130,
+ 130,
+ 130,
+ 130,
+ 130,
+ 0.0001,
+ 0.0001,
+ 0.0001,
+ 0.0001,
+ 0.01,
+ ]
+ )
+
+ action_space_high = np.array(
+ [
+ 100000,
+ 100000,
+ 100000,
+ 100000,
+ 100000,
+ 100000,
+ 1.0,
+ 1.0,
+ 1.0,
+ 1.0,
+ 10,
+ ]
+ )
+
+ # fmt: on
+
+ self.action_normalizer = ActionNormalizer(
+ action_space_low=action_space_low, action_space_high=action_space_high
+ )
+
+ # self.action_space = spaces.Discrete(len(self.action_meaning)**len(self.params_id))
+ self.observation_space = spaces.Box(
+ low=np.array(
+ [Zhenxin_S_FC.PERF_LOW] * 2 * len(self.specs_id)
+ + len(self.params_id) * [1]
+ ),
+ high=np.array(
+ [Zhenxin_S_FC.PERF_HIGH] * 2 * len(self.specs_id)
+ + len(self.params_id) * [1]
+ ),
+ )
+
+ # initialize current param/spec observations
+ self.cur_specs = np.zeros(len(self.specs_id), dtype=np.float32)
+ self.cur_params_idx = np.zeros(len(self.params_id), dtype=np.int32)
+
+ # Get the g* (overall design spec) you want to reach
+ self.global_g = []
+ for spec in list(self.specs.values()):
+ self.global_g.append(float(spec[self.fixed_goal_idx]))
+ self.g_star = np.array(self.global_g)
+ self.global_g = np.array(yaml_data["normalize"])
+
+ # objective number (used for validation)
+ self.obj_idx = 0
+
+ def reset(self):
+ # if multi-goal is selected, every time reset occurs, it will select a different design spec as objective
+ """
+ Reset the environment state and return the initial observation.
+
+ Resets or (when generalization is enabled) selects a new target design specification, normalizes it, initializes the current parameter vector (hard-coded in this implementation), computes the initial simulated specs for those parameters, evaluates the initial reward, and constructs the initial observation.
+
+ Behavior:
+ - If generalize is True:
+ - If valid is True, cycles through spec indices using self.obj_idx (wraps to 0 when exceeding available designs).
+ - Otherwise selects a random spec index.
+ - Sets self.specs_ideal to the selected column across all stored specs.
+ - If generalize is False:
+ - If multi_goal is False, sets self.specs_ideal to self.g_star (single global goal).
+ - If multi_goal is True, selects a random spec index and sets self.specs_ideal accordingly.
+ - Computes self.specs_ideal_norm via self.lookup against self.global_g.
+ - Assigns a predefined initial parameter vector to self.cur_params_idx (overwrites multiple candidate vectors; final assignment used).
+ - Calls self.update(self.cur_params_idx) to compute self.cur_specs and normalizes it.
+ - Computes initial reward (via self.reward) and builds the initial observation self.ob by concatenating normalized current specs, normalized ideal specs, and current parameter values.
+
+ Returns:
+ numpy.ndarray: The initial observation vector (concatenation of current-specs-normalized, ideal-specs-normalized, and current parameter values).
+ """
+ if self.generalize == True:
+ if self.valid == True:
+ if self.obj_idx > self.num_os - 1:
+ self.obj_idx = 0
+ idx = self.obj_idx
+ self.obj_idx += 1
+ else:
+ idx = random.randint(0, self.num_os - 1)
+ self.specs_ideal = []
+ for spec in list(self.specs.values()):
+ self.specs_ideal.append(spec[idx])
+ self.specs_ideal = np.array(self.specs_ideal)
+ else:
+ if self.multi_goal == False:
+ self.specs_ideal = self.g_star
+ else:
+ idx = random.randint(0, self.num_os - 1)
+ self.specs_ideal = []
+ for spec in list(self.specs.values()):
+ self.specs_ideal.append(spec[idx])
+ self.specs_ideal = np.array(self.specs_ideal)
+ # print("num total:"+str(self.num_os))
+
+ # applicable only when you have multiple goals, normalizes everything to some global_g
+ self.specs_ideal_norm = self.lookup(self.specs_ideal, self.global_g)
+
+ # initialize current parameters
+ # self.cur_params_idx = np.array([2] * 17)
+ # self.cur_params_idx = np.array(
+ # # [2, 2, 2, 2, 2, 2] + [200, 200, 200, 200, 200, 200] + [10, 10, 10, 10, 10]
+ # [33, 33, 33, 33, 33, 33]
+ # + [10, 10, 10, 10, 10]
+ # )
+
+ # ADD_CIRCUIT
+ # fmt: off
+ self.cur_params_idx = np.array([3.74753369e+01 ,1.45339479e+02 ,8.10000000e+01 ,4.47246834e+01,
+ 5.42556293e+02 ,3.00000000e+01 ,7.92805812e+01, 6.73899490e+02,
+ 6.50000000e+01 ,5.03197719e+01 ,1.78390864e+02, 8.20000000e+01,
+ 7.67682715e+01 ,5.71772797e+02 ,7.40000000e+01, 7.27723837e+01,
+ 2.66384969e+02 ,7.10000000e+01 ,1.03974815e-01, 8.55390346e-01,
+ 2.17883575e-01 ,1.02317559e+00 ,1.00000000e+01, 1.00000000e+00])
+ self.cur_params_idx = np.array([32.77338894433899,262.7480907119822,189.0,158.15349366404624,348.4724582030384,154.0,79.77405852298419,1111.4785699664237,86.0,99.43193620474902,1571.1634654290083,137.0,92.27007942763514,582.0961144404719,165.0,62.95195636704306,742.9661412949395,150.0,0.15006895370431445,0.707724000583208,0.34247109456861674,0.8081614628147477,24.0,18.0])
+ self.cur_params_idx = np.array([1.306996077907531,1.3377967426358073,40.0,5.954494693818084,0.8992126892338544,37.0,25.488818790800927,0.4551653253998019,112.0,4.224049384633847,0.5619414658059543,140.0,25.698553139281227,1.0913253825818343,39.0,12.149631485977649,0.586970473291363,1.0,1.1773170908295258,0.2848438042937586,1.0786470679021434,0.8902246665998383,12.0,47.0])
+ self.cur_params_idx = np.array([2.4284323390575366,1.338078576088182,78.0,29.264831036522917,1.9892456090951285,33.0,13.980927503463723,1.3113102449401783,17.0,32.12785582624383,1.8436928811518705,5.0,42.023886484328216,1.3431306578872302,46.0,35.47162817259426,1.3712540446476695,77.0,0.6568611016690267,0.17811360700059536,0.686108575948138,0.3053737857576733,1.0,1.0])
+ self.cur_params_idx = np.array([242.84323390575366,
+ 133.8078576088182,
+ 292.64831036522917,
+ 198.92456090951285,
+ 139.80927503463723,
+ 131.13102449401783,
+ 0.6568611016690267,
+ 0.17811360700059536,
+ 0.686108575948138,
+ 0.3053737857576733,
+ 1.0,])
+
+ # fmt: on
+
+ self.cur_specs = self.update(self.cur_params_idx)
+ cur_spec_norm = self.lookup(self.cur_specs, self.global_g)
+ reward = self.reward(self.cur_specs, self.specs_ideal)
+
+ # observation is a combination of current specs distance from ideal, ideal spec, and current param vals
+ self.ob = np.concatenate(
+ [cur_spec_norm, self.specs_ideal_norm, self.cur_params_idx]
+ )
+ return self.ob
+
+ def step(self, action):
+ """
+ Apply an action to update the environment's parameters, run the simulator, and return the next observation, reward, termination flag, and info.
+
+ The provided `action` is expected in the agent's action space (typically values in [-1, 1]); it is first mapped to the environment's parameter value space using self.action_normalizer.action. The mapped values replace the current parameter vector, the simulator is invoked via self.update(...) to produce new specs, and a scalar reward is computed comparing the current specs to the environment goal. The environment's internal observation (self.ob) and step counter (self.env_steps) are updated.
+
+ Parameters:
+ action (array-like): Agent action vector (shape matches the environment action space, e.g., length 11). Values are in the agent's action range and will be converted to actual parameter values by the environment's ActionNormalizer.
+
+ Returns:
+ tuple:
+ observation (np.ndarray): Concatenation of normalized current specs, normalized ideal specs, and the current parameter values.
+ reward (float): Reward computed by self.reward(...) for the resulting specs.
+ done (bool): True when a terminal condition is met (reward >= 10), otherwise False.
+ info (dict): Empty dict (reserved for additional diagnostics).
+ """
+
+ # Take action that RL agent returns to change current params
+ # action = list(np.reshape(np.array(action), (np.array(action).shape[0],)))
+ # self.cur_params_idx = self.cur_params_idx + np.array(
+ # [self.action_meaning[a] for a in action]
+ # )
+
+ # # self.cur_params_idx = self.cur_params_idx + np.array(self.action_arr[int(action)])
+ # self.cur_params_idx = np.clip(
+ # self.cur_params_idx,
+ # [0] * len(self.params_id),
+ # [(len(param_vec) - 1) for param_vec in self.params],
+ # )
+ # # logger.debug(f"current param idx: {str(self.cur_params_idx)}")
+ # # print(f"current param idx: {self.cur_params_idx=}")
+ # logger.debug("current param idx simulation: " + str(self.cur_params_idx))
+ action = self.action_normalizer.action(
+ action
+ ) # convert [-1.1] range back to normal range
+ # action = action.astype(object)
+
+ # ADD_CIRCUIT
+ # for idx in [2, 2 + 3, 5 + 3, 8 + 3, 11 + 3, 14 + 3]:
+ # try:
+ # action[idx] = int(action[idx])
+ # except:
+ # logger.debug("error when rounding the M value")
+ # action[idx] = 1
+
+ self.cur_params_idx = action
+
+ # Get current specs and normalize
+ self.cur_specs = self.update(self.cur_params_idx)
+ # logger.info("current specs simulation: " + str(self.cur_specs))
+ cur_spec_norm = self.lookup(self.cur_specs, self.global_g)
+ reward = self.reward(self.cur_specs, self.specs_ideal)
+ done = False
+
+ # incentivize reaching goal state
+ if reward >= 10:
+ done = True
+ print("-" * 10)
+ print("params = ", self.cur_params_idx)
+ print("specs:", self.cur_specs)
+ print("ideal specs:", self.specs_ideal)
+ print("re:", reward)
+ print("-" * 10)
+
+ self.ob = np.concatenate(
+ [cur_spec_norm, self.specs_ideal_norm, self.cur_params_idx]
+ )
+ self.env_steps = self.env_steps + 1
+
+ logger.info("current specs:" + str(self.cur_specs) + ", reward: " + str(reward))
+ # writer.add_scalar('gain', self.cur_specs[0], self.env_steps)
+ # writer.add_scalar('ugbw', self.cur_specs[1], self.env_steps)
+ # writer.add_scalar('pm', self.cur_specs[2], self.env_steps)
+ # writer.add_scalar('power', self.cur_specs[3], self.env_steps)
+ # print('cur ob:' + str(self.cur_specs))
+ # print('ideal spec:' + str(self.specs_ideal))
+ # print(reward)
+ return self.ob, reward, done, {}
+
+ def lookup(self, spec, goal_spec):
+ goal_spec = [float(e) for e in goal_spec]
+ norm_spec = (spec - goal_spec) / (goal_spec + spec)
+ return norm_spec
+
+ def reward(self, spec, goal_spec):
+ """
+ Compute a scalar objective for the current specs relative to a goal specification.
+
+ This function:
+ - Normalizes the difference between `spec` and `goal_spec` using self.lookup.
+ - Accumulates a penalty according to per-spec rules:
+ - "ibias_max": penalize only when the normalized value is positive (larger than goal).
+ - "gain_min": penalize only when the normalized value is negative (smaller than goal).
+ - All other tracked specs ("ugbw_min", "phm_min"): penalize when the normalized value is negative (smaller than goal).
+ - Returns either the negated accumulated penalty (a negative value) or 10 when the negated penalty is above a small threshold, indicating a sufficiently good match.
+
+ Parameters:
+ spec (array-like): Current specification values (ordered to match self.specs_id).
+ goal_spec (array-like): Target/ideal specification values.
+
+ Returns:
+ float: Either a negative penalty (-sum_of_violations) or 10 when the negated penalty is >= -0.02 (tolerance threshold).
+ """
+ # rel_specs = self.lookup(spec, goal_spec)
+ # pos_val = []
+ # reward = 0.0
+ # for i, rel_spec in enumerate(rel_specs):
+ # if self.specs_id[i] == "ibias_max":
+ # rel_spec = rel_spec * -1.0 # /10.0
+ # if rel_spec < 0:
+ # reward += rel_spec
+ # pos_val.append(0)
+ # else:
+ # pos_val.append(1)
+
+ # return reward if reward < -0.02 else 10
+
+ norm_specs = self.lookup(spec, goal_spec)
+
+ # pay attention to reward calculation, this is not quite the reward function in RL
+ # but rather a penalty value for the optimization process
+ reward = 0
+ for i, rel_spec in enumerate(norm_specs):
+ # For power, smaller is better
+ # For gain, larger (compared to the target/goal) is better
+ # For other specs (pm, ugbw, etc.), smaller is better
+ assert self.specs_id[i] in ["ibias_max", "gain_min", "ugbw_min", "phm_min"]
+ if self.specs_id[i] == "ibias_max" and rel_spec > 0:
+ reward += np.abs(rel_spec) # /10
+ elif self.specs_id[i] == "gain_min" and rel_spec < 0:
+ reward += 1 * np.abs(rel_spec) # /10
+ elif self.specs_id[i] != "ibias_max" and rel_spec < 0:
+ reward += np.abs(rel_spec)
+ # return -reward
+ return -reward if -reward < -0.02 else 10
+
+ def update(self, params_idx):
+ """
+ Update the circuit design using the provided parameter vector, run the simulator, and return the resulting specifications.
+
+ Parameters:
+ params_idx (Sequence[int|float]): Sequence of 11 parameter values (in the same order as the internal
+ param_names: ["w_m12","w_m3","w_m45","w_m67","w_m89","w_m1011","vbp1","vbp2","vbn1","vbn2","cc"].
+ These are treated as the parameter values passed to the simulator.
+
+ Returns:
+ numpy.ndarray: 1-D array of simulated specification values. The specs are taken from the simulator's
+ output, sorted by specification name (ascending) before conversion to the array.
+ """
+
+ # params = [self.params[i][params_idx[i]] for i in range(len(self.params_id))]
+ # param_val = [OrderedDict(list(zip(self.params_id, params)))]
+
+ # ADD_CIRCUIT
+ # fmt: off
+ param_names = [
+ "w_m12",
+ "w_m3",
+ "w_m45",
+ "w_m67",
+ "w_m89",
+ "w_m1011",
+ "vbp1",
+ "vbp2",
+ "vbn1",
+ "vbn2",
+ "cc"
+ ]
+ # fmt: on
+
+ param_val = [OrderedDict(list(zip(param_names, params_idx)))]
+
+ # run param vals and simulate
+ cur_specs = OrderedDict(
+ sorted(
+ self.sim_env.create_design_and_simulate(param_val[0])[1].items(),
+ key=lambda k: k[0],
+ )
+ )
+ cur_specs = np.array(list(cur_specs.values()))
+
+ return cur_specs
+
+
+def main():
+ env_config = {"generalize": True, "valid": True}
+ env = Zhenxin_S_FC(env_config)
+ env.reset()
+ # env.step(
+ # [
+ # 2,
+ # 2,
+ # 2,
+ # 2,
+ # 2,
+ # 2,
+ # 10 - 9,
+ # 10 - 9,
+ # 10 - 9,
+ # 10 - 9,
+ # 10 - 9,
+ # 10 - 9,
+ # 0.2,
+ # 0.2,
+ # 0.2,
+ # 0.2,
+ # 0.2,
+ # ]
+ # )
+ env.step([2] * 11)
+
+ IPython.embed()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/autockt/eval.py b/autockt/eval.py
index f2d1b8c..15a8975 100644
--- a/autockt/eval.py
+++ b/autockt/eval.py
@@ -18,6 +18,34 @@
# from bag_deep_ckt.autockt.envs.bag_opamp_discrete import TwoStageAmp
from envs.ngspice_vanilla_opamp import TwoStageAmp
+from envs.ngspice_ledro_d_fc import LEDRO_D_FC
+
+from loguru import logger
+import sys
+
+# Custom format string
+log_format = (
+ "{time:YYYY-MM-DD HH:mm:ss.SSS} | "
+ "{level: <8} | "
+ "{module}:{function}:{line} - "
+ "{message}"
+)
+
+# Clear default logger
+logger.remove()
+
+# Log to stdout
+logger.add(sys.stdout, format=log_format, level="DEBUG")
+
+# Log to file with rotation and retention
+logger.add(
+ "logs/eval.log",
+ format=log_format,
+ level="DEBUG",
+ rotation="1 day",
+ retention="7 days",
+)
+
EXAMPLE_USAGE = """
Example Usage via RLlib CLI:
@@ -33,6 +61,7 @@
# ModelCatalog.register_custom_model("pa_model", ParametricActionsModel)
# register_env("pa_cartpole", lambda _: ParametricActionCartpole(10))
register_env("opamp-v0", lambda config: TwoStageAmp(config))
+register_env("ledro_d_fc", lambda config: LEDRO_D_FC(config))
def create_parser(parser_creator=None):
@@ -145,6 +174,9 @@ def rollout(agent, env_name, num_steps, out="assdf", no_render=True):
}
if env_name == "opamp-v0":
env = TwoStageAmp(env_config=env_config)
+ if env_name == "ledro_d_fc":
+ env = LEDRO_D_FC(env_config=env_config)
+
else:
env = gym.make(env_name)
@@ -186,10 +218,21 @@ def rollout(agent, env_name, num_steps, out="assdf", no_render=True):
action = agent.compute_action(state)
action_array.append(action)
+ # action = env.action_space.sample()
+ # action_array.append(action)
+
next_state, reward, done, _ = env.step(action)
- print(action)
- print(reward)
- print(done)
+ logger.debug(
+ "\n action: "
+ + str(action)
+ + "\n reward: "
+ + str(reward)
+ + "\n done: "
+ + str(done)
+ + "\n next_state: "
+ + str(next_state)
+ )
+
reward_total += reward
if not no_render:
env.render()
diff --git a/autockt/gen_specs.py b/autockt/gen_specs.py
index d4d2ac2..a812fca 100644
--- a/autockt/gen_specs.py
+++ b/autockt/gen_specs.py
@@ -6,6 +6,7 @@
import argparse
from collections import OrderedDict
import pickle
+import datetime
# way of ordering the way a yaml file is read
@@ -73,6 +74,19 @@ def gen_data(CIR_YAML, env, num_specs):
with open("autockt/gen_specs/ngspice_specs_gen_" + env, "wb") as f:
pickle.dump(specs_range, f)
+ with open("autockt/gen_specs/ngspice_specs_gen_" + env + ".log", "w") as f:
+ f.write("total specs: " + str(len(specs_range["gain_min"])))
+ for spec in specs_range:
+ f.write("\n")
+ f.write(str(spec) + "\n")
+ f.write(str(specs_range[spec]))
+ # write generated time/date
+ f.write("\n\n")
+ f.write(
+ "Generated at: "
+ + str(datetime.datetime.now().strftime("%d/%m/%Y, %H:%M:%S"))
+ )
+
def main():
parser = argparse.ArgumentParser()
@@ -82,6 +96,15 @@ def main():
gen_data(CIR_YAML, "two_stage_opamp", int(args.num_specs))
+ CIR_YAML = "eval_engines/ngspice/ngspice_inputs/yaml_files/ledro_d_fc.yaml"
+ gen_data(CIR_YAML, "ledro_d_fc", int(args.num_specs))
+
+ CIR_YAML = "eval_engines/ngspice/ngspice_inputs/yaml_files/ledro_d_fc45.yaml"
+ gen_data(CIR_YAML, "ledro_d_fc45", int(args.num_specs))
+
+ CIR_YAML = "eval_engines/ngspice/ngspice_inputs/yaml_files/zhenxin_s_fc.yaml"
+ gen_data(CIR_YAML, "zhenxin_s_fc", int(args.num_specs))
+
if __name__ == "__main__":
main()
diff --git a/autockt/gen_specs/ngspice_specs_gen_ledro_d_fc b/autockt/gen_specs/ngspice_specs_gen_ledro_d_fc
new file mode 100644
index 0000000..de643e4
Binary files /dev/null and b/autockt/gen_specs/ngspice_specs_gen_ledro_d_fc differ
diff --git a/autockt/gen_specs/ngspice_specs_gen_ledro_d_fc45 b/autockt/gen_specs/ngspice_specs_gen_ledro_d_fc45
new file mode 100644
index 0000000..5b414ac
Binary files /dev/null and b/autockt/gen_specs/ngspice_specs_gen_ledro_d_fc45 differ
diff --git a/autockt/gen_specs/ngspice_specs_gen_two_stage_opamp b/autockt/gen_specs/ngspice_specs_gen_two_stage_opamp
index 97c99ba..80d184f 100644
Binary files a/autockt/gen_specs/ngspice_specs_gen_two_stage_opamp and b/autockt/gen_specs/ngspice_specs_gen_two_stage_opamp differ
diff --git a/autockt/gen_specs/ngspice_specs_gen_zhenxin_s_fc b/autockt/gen_specs/ngspice_specs_gen_zhenxin_s_fc
new file mode 100644
index 0000000..8c63da6
Binary files /dev/null and b/autockt/gen_specs/ngspice_specs_gen_zhenxin_s_fc differ
diff --git a/autockt/train_ledro.py b/autockt/train_ledro.py
new file mode 100644
index 0000000..55ba856
--- /dev/null
+++ b/autockt/train_ledro.py
@@ -0,0 +1,55 @@
+import ray
+import ray.tune as tune
+from ray.rllib.agents import ppo
+from autockt.envs.ngspice_vanilla_opamp import TwoStageAmp
+from autockt.envs.ngspice_ledro_d_fc import LEDRO_D_FC
+
+import argparse
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--checkpoint_dir", "-cpd", type=str)
+args = parser.parse_args()
+ray.init()
+
+# configures training of the agent with associated hyperparameters
+# See Ray documentation for details on each parameter
+
+config_train = {
+ "train_batch_size": 1200,
+ "horizon": 200, # 100 discrete values, starting from 33 (envs/ngspice_ledro_d_dc.py line 210), max step size = 2, -> 33 + 2*200 = 433.
+ "num_gpus": 0,
+ # "model": {"fcnet_hiddens": [64, 64]},
+ "model": {"fcnet_hiddens": [128, 128, 128]},
+ "num_workers": 6,
+ "env_config": {"generalize": True, "run_valid": False},
+}
+
+# Runs training and saves the result in ~/ray_results/train_ngspice_45nm
+# If checkpoint fails for any reason, training can be restored
+if True:
+ trials = tune.run_experiments(
+ {
+ "train_7nFinFET_LEDRO_D_FC": {
+ "checkpoint_freq": 10,
+ "run": "PPO",
+ "env": LEDRO_D_FC,
+ "stop": {"episode_reward_mean": -0.02},
+ "config": config_train,
+ },
+ }
+ )
+else:
+ print("RESTORING NOW!!!!!!")
+ exit() # do not restore for now
+ tune.run_experiments(
+ {
+ "restore_ppo": {
+ "run": "PPO",
+ "config": config_train,
+ "env": LEDRO_D_FC,
+ # "restore": trials[0]._checkpoint.value},
+ "restore": args.checkpoint_dir,
+ "checkpoint_freq": 1,
+ },
+ }
+ )
diff --git a/autockt/train_ledro45.py b/autockt/train_ledro45.py
new file mode 100644
index 0000000..e75ed18
--- /dev/null
+++ b/autockt/train_ledro45.py
@@ -0,0 +1,79 @@
+import ray
+import ray.tune as tune
+from ray.rllib.agents import ppo
+from autockt.envs.ngspice_vanilla_opamp import TwoStageAmp
+from autockt.envs.ngspice_ledro_d_fc import LEDRO_D_FC
+from autockt.envs.ngspice_ledro_d_fc45 import LEDRO_D_FC45
+
+
+import argparse
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--checkpoint_dir", "-cpd", type=str)
+args = parser.parse_args()
+ray.init()
+
+# configures training of the agent with associated hyperparameters
+# See Ray documentation for details on each parameter
+config_train = {
+ # "sample_batch_size": 200,
+ "train_batch_size": 2400,
+ # "sgd_minibatch_size": 1200,
+ # "num_sgd_iter": 3,
+ # "lr":1e-3,
+ # "vf_loss_coeff": 0.5,
+ "horizon": 400,
+ # "rollout_fragment_length": 1200,
+ # "rollout_fragment_length": 200,
+ "num_gpus": 0,
+ "model": {"fcnet_hiddens": [64, 64]},
+ "num_workers": 6,
+ "env_config": {"generalize": True, "run_valid": False},
+}
+config_train = {
+ "train_batch_size": 1200,
+ "horizon": 100,
+ "num_gpus": 0,
+ # "model": {"fcnet_hiddens": [64, 64]},
+ "model": {"fcnet_hiddens": [128, 128, 128]},
+ "num_workers": 6,
+ "env_config": {"generalize": True, "run_valid": False},
+}
+# Runs training and saves the result in ~/ray_results/train_ngspice_45nm
+# If checkpoint fails for any reason, training can be restored
+if True:
+ trials = tune.run_experiments(
+ {
+ "train_7nFinFET_LEDRO_D_FC": {
+ "checkpoint_freq": 10,
+ "run": "PPO",
+ "env": LEDRO_D_FC45,
+ # "stop": {"episode_reward_mean": -0.02},
+ # "stop": {"episode_reward_mean": -0.25},
+ "config": config_train,
+ },
+ }
+ )
+ # trials = tune.run(
+ # "PPO",
+ # config=config_train,
+ # stop={"training_iteration": 1000},
+ # checkpoint_freq=10,
+ # name="train_7nFinFET_LEDRO_D_DC_1",
+ # )
+
+else:
+ print("RESTORING NOW!!!!!!")
+ exit() # do not restore for now
+ tune.run_experiments(
+ {
+ "restore_ppo": {
+ "run": "PPO",
+ "config": config_train,
+ "env": LEDRO_D_FC45,
+ # "restore": trials[0]._checkpoint.value},
+ "restore": args.checkpoint_dir,
+ "checkpoint_freq": 1,
+ },
+ }
+ )
diff --git a/autockt/train_zhenxin_s_fc_65nm.py b/autockt/train_zhenxin_s_fc_65nm.py
new file mode 100644
index 0000000..a65c177
--- /dev/null
+++ b/autockt/train_zhenxin_s_fc_65nm.py
@@ -0,0 +1,80 @@
+import ray
+import ray.tune as tune
+from ray.rllib.agents import ppo
+from autockt.envs.ngspice_vanilla_opamp import TwoStageAmp
+from autockt.envs.ngspice_ledro_d_fc import LEDRO_D_FC
+
+# ADD_CIRCUIT
+from autockt.envs.ngspice_ledro_d_fc45 import LEDRO_D_FC45
+from autockt.envs.ngspice_zhenxin_s_fc import Zhenxin_S_FC
+
+import argparse
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--checkpoint_dir", "-cpd", type=str, default=None)
+args = parser.parse_args()
+ray.init()
+
+# configures training of the agent with associated hyperparameters
+# See Ray documentation for details on each parameter
+config_train = {
+ # "sample_batch_size": 200,
+ "train_batch_size": 2400,
+ # "sgd_minibatch_size": 1200,
+ # "num_sgd_iter": 3,
+ # "lr":1e-3,
+ # "vf_loss_coeff": 0.5,
+ "horizon": 400,
+ # "rollout_fragment_length": 1200,
+ # "rollout_fragment_length": 200,
+ "num_gpus": 0,
+ "model": {"fcnet_hiddens": [64, 64]},
+ "num_workers": 6,
+ "env_config": {"generalize": False, "run_valid": False},
+}
+config_train = {
+ "train_batch_size": 1200,
+ "horizon": 50,
+ "num_gpus": 0,
+ # "model": {"fcnet_hiddens": [64, 64]},
+ "model": {"fcnet_hiddens": [128, 128, 128]},
+ "num_workers": 6,
+ "env_config": {"generalize": False, "run_valid": False},
+}
+# Runs training and saves the result in ~/ray_results/train_ngspice_45nm
+# If checkpoint fails for any reason, training can be restored
+if not args.checkpoint_dir:
+ trials = tune.run_experiments(
+ {
+ "train_65nm_Zhenxin_S_FC": {
+ "checkpoint_freq": 10,
+ "run": "PPO",
+ "env": Zhenxin_S_FC, # ADD_CIRCUIT
+ # "stop": {"episode_reward_mean": -0.02},
+ # "stop": {"episode_reward_mean": -0.25},
+ "config": config_train,
+ },
+ }
+ )
+ # trials = tune.run(
+ # "PPO",
+ # config=config_train,
+ # stop={"training_iteration": 1000},
+ # checkpoint_freq=10,
+ # name="train_7nFinFET_LEDRO_D_DC_1",
+ # )
+
+else:
+ print("RESTORING NOW!!!!!!")
+ tune.run_experiments(
+ {
+ "restore_ppo": {
+ "run": "PPO",
+ "config": config_train,
+ "env": LEDRO_D_FC45,
+ # "restore": trials[0]._checkpoint.value},
+ "restore": args.checkpoint_dir,
+ "checkpoint_freq": 1,
+ },
+ }
+ )
diff --git a/eval_engines/ngspice/LEDRO_D_FC.py b/eval_engines/ngspice/LEDRO_D_FC.py
new file mode 100644
index 0000000..4ff3ee3
--- /dev/null
+++ b/eval_engines/ngspice/LEDRO_D_FC.py
@@ -0,0 +1,136 @@
+import numpy as np
+import os
+import scipy.interpolate as interp
+import scipy.optimize as sciopt
+import yaml
+import importlib
+import time
+
+debug = False
+
+from eval_engines.ngspice.ngspice_wrapper import NgSpiceWrapper
+
+import random
+import re
+import copy
+
+
+class LEDRO_D_FC_Class(NgSpiceWrapper):
+
+ def translate_result(self, output_path):
+ """
+
+ :param output_path:
+ :return
+ result: dict(spec_kwds, spec_value)
+ """
+
+ # use parse output here
+ freq, vout, ibias = self.parse_output(output_path)
+ gain = self.find_dc_gain(vout)
+ ugbw = self.find_ugbw(freq, vout)
+ phm = self.find_phm(freq, vout)
+
+ spec = dict(ugbw=ugbw, gain=gain, phm=phm, ibias=ibias)
+
+ return spec
+
+ def parse_output(self, output_path):
+
+ ac_fname = os.path.join(output_path, "ac.csv")
+ dc_fname = os.path.join(output_path, "dc.csv")
+
+ if not os.path.isfile(ac_fname) or not os.path.isfile(dc_fname):
+ print("ac/dc file doesn't exist: %s" % output_path)
+
+ ac_raw_outputs = np.genfromtxt(ac_fname, skip_header=1)
+ dc_raw_outputs = np.genfromtxt(dc_fname, skip_header=1)
+ freq = ac_raw_outputs[:, 0]
+ vout_real = ac_raw_outputs[:, 1]
+ vout_imag = ac_raw_outputs[:, 2]
+ vout = vout_real + 1j * vout_imag
+ ibias = -dc_raw_outputs[1]
+
+ return freq, vout, ibias
+
+ def find_dc_gain(self, vout):
+ return np.abs(vout)[0]
+
+ def find_ugbw(self, freq, vout):
+ gain = np.abs(vout)
+ ugbw, valid = self._get_best_crossing(freq, gain, val=1)
+ if valid:
+ return ugbw
+ else:
+ return freq[0]
+
+ def find_phm(self, freq, vout):
+ gain = np.abs(vout)
+ phase = np.angle(vout, deg=False)
+ phase = np.unwrap(phase) # unwrap the discontinuity
+ phase = np.rad2deg(phase) # convert to degrees
+ #
+ # plt.subplot(211)
+ # plt.plot(np.log10(freq[:200]), 20*np.log10(gain[:200]))
+ # plt.subplot(212)
+ # plt.plot(np.log10(freq[:200]), phase)
+
+ phase_fun = interp.interp1d(freq, phase, kind="quadratic")
+ ugbw, valid = self._get_best_crossing(freq, gain, val=1)
+ if valid:
+ if phase_fun(ugbw) > 0:
+ return -180 + phase_fun(ugbw)
+ else:
+ return 180 + phase_fun(ugbw)
+ else:
+ return -180
+
+ def _get_best_crossing(cls, xvec, yvec, val):
+ interp_fun = interp.InterpolatedUnivariateSpline(xvec, yvec)
+
+ def fzero(x):
+ return interp_fun(x) - val
+
+ xstart, xstop = xvec[0], xvec[-1]
+ try:
+ return sciopt.brentq(fzero, xstart, xstop), True
+ except ValueError:
+ # avoid no solution
+ # if abs(fzero(xstart)) < abs(fzero(xstop)):
+ # return xstart
+ return xstop, False
+
+ def create_design(self, state, new_fname):
+ design_folder = os.path.join(self.gen_dir, new_fname) + str(
+ random.randint(0, 10000)
+ )
+ os.makedirs(design_folder, exist_ok=True)
+
+ fpath = os.path.join(design_folder, new_fname + ".cir")
+
+ state["nB1"] = int(round(state["nB1"]))
+ state["nB2"] = int(round(state["nB2"]))
+ state["nB3"] = int(round(state["nB3"]))
+ state["nB4"] = int(round(state["nB4"]))
+ state["nB5"] = int(round(state["nB5"]))
+ state["nB6"] = int(round(state["nB6"]))
+
+ state["nA1"] = state["nA1"] * 1e-9
+ state["nA2"] = state["nA2"] * 1e-9
+ state["nA3"] = state["nA3"] * 1e-9
+ state["nA4"] = state["nA4"] * 1e-9
+ state["nA5"] = state["nA5"] * 1e-9
+ state["nA6"] = state["nA6"] * 1e-9
+ state["vbiasp1"] = state["vbiasp1"] / 10
+ state["vbiasp2"] = state["vbiasp2"] / 10
+ state["vbiasn0"] = state["vbiasn0"] / 10
+ state["vbiasn1"] = state["vbiasn1"] / 10
+ state["vbiasn2"] = state["vbiasn2"] / 10
+ state["design_path"] = design_folder
+ with open(fpath, "w") as f:
+ # render the design netlist with the current state
+ netlist_str = self.design_template.render(state)
+ f.write(netlist_str)
+ f.close()
+
+ return design_folder, fpath
diff --git a/eval_engines/ngspice/LEDRO_D_FC45.py b/eval_engines/ngspice/LEDRO_D_FC45.py
new file mode 100644
index 0000000..e6046ef
--- /dev/null
+++ b/eval_engines/ngspice/LEDRO_D_FC45.py
@@ -0,0 +1,117 @@
+import numpy as np
+import os
+import scipy.interpolate as interp
+import scipy.optimize as sciopt
+import yaml
+import importlib
+import time
+
+debug = False
+
+from eval_engines.ngspice.ngspice_wrapper import NgSpiceWrapper
+
+import random
+import re
+import copy
+
+
+class LEDRO_D_FC45_Class(NgSpiceWrapper):
+
+ def translate_result(self, output_path):
+ """
+
+ :param output_path:
+ :return
+ result: dict(spec_kwds, spec_value)
+ """
+
+ # use parse output here
+ freq, vout, ibias = self.parse_output(output_path)
+ gain = self.find_dc_gain(vout)
+ ugbw = self.find_ugbw(freq, vout)
+ phm = self.find_phm(freq, vout)
+
+ spec = dict(ugbw=ugbw, gain=gain, phm=phm, ibias=ibias)
+
+ return spec
+
+ def parse_output(self, output_path):
+
+ ac_fname = os.path.join(output_path, "ac.csv")
+ dc_fname = os.path.join(output_path, "dc.csv")
+
+ if not os.path.isfile(ac_fname) or not os.path.isfile(dc_fname):
+ print("ac/dc file doesn't exist: %s" % output_path)
+
+ ac_raw_outputs = np.genfromtxt(ac_fname, skip_header=1)
+ dc_raw_outputs = np.genfromtxt(dc_fname, skip_header=1)
+ freq = ac_raw_outputs[:, 0]
+ vout_real = ac_raw_outputs[:, 1]
+ vout_imag = ac_raw_outputs[:, 2]
+ vout = vout_real + 1j * vout_imag
+ ibias = -dc_raw_outputs[1]
+
+ return freq, vout, ibias
+
+ def find_dc_gain(self, vout):
+ return np.abs(vout)[0]
+
+ def find_ugbw(self, freq, vout):
+ gain = np.abs(vout)
+ ugbw, valid = self._get_best_crossing(freq, gain, val=1)
+ if valid:
+ return ugbw
+ else:
+ return freq[0]
+
+ def find_phm(self, freq, vout):
+ gain = np.abs(vout)
+ phase = np.angle(vout, deg=False)
+ phase = np.unwrap(phase) # unwrap the discontinuity
+ phase = np.rad2deg(phase) # convert to degrees
+ #
+ # plt.subplot(211)
+ # plt.plot(np.log10(freq[:200]), 20*np.log10(gain[:200]))
+ # plt.subplot(212)
+ # plt.plot(np.log10(freq[:200]), phase)
+
+ phase_fun = interp.interp1d(freq, phase, kind="quadratic")
+ ugbw, valid = self._get_best_crossing(freq, gain, val=1)
+ if valid:
+ if phase_fun(ugbw) > 0:
+ return -180 + phase_fun(ugbw)
+ else:
+ return 180 + phase_fun(ugbw)
+ else:
+ return -180
+
+ def _get_best_crossing(cls, xvec, yvec, val):
+ interp_fun = interp.InterpolatedUnivariateSpline(xvec, yvec)
+
+ def fzero(x):
+ return interp_fun(x) - val
+
+ xstart, xstop = xvec[0], xvec[-1]
+ try:
+ return sciopt.brentq(fzero, xstart, xstop), True
+ except ValueError:
+ # avoid no solution
+ # if abs(fzero(xstart)) < abs(fzero(xstop)):
+ # return xstart
+ return xstop, False
+
+ def create_design(self, state, new_fname):
+ design_folder = os.path.join(self.gen_dir, new_fname) + str(
+ random.randint(0, 10000)
+ )
+ os.makedirs(design_folder, exist_ok=True)
+
+ fpath = os.path.join(design_folder, new_fname + ".cir")
+ state["design_path"] = design_folder
+ with open(fpath, "w") as f:
+ # render the design netlist with the current state
+ netlist_str = self.design_template.render(state)
+ f.write(netlist_str)
+ f.close()
+
+ return design_folder, fpath
diff --git a/eval_engines/ngspice/Zhenxin_S_FC.py b/eval_engines/ngspice/Zhenxin_S_FC.py
new file mode 100644
index 0000000..39efd0e
--- /dev/null
+++ b/eval_engines/ngspice/Zhenxin_S_FC.py
@@ -0,0 +1,117 @@
+import numpy as np
+import os
+import scipy.interpolate as interp
+import scipy.optimize as sciopt
+import yaml
+import importlib
+import time
+
+debug = False
+
+from eval_engines.ngspice.ngspice_wrapper import NgSpiceWrapper
+
+import random
+import re
+import copy
+
+
+class Zhenxin_S_FC_Class(NgSpiceWrapper):
+
+ def translate_result(self, output_path):
+ """
+
+ :param output_path:
+ :return
+ result: dict(spec_kwds, spec_value)
+ """
+
+ # use parse output here
+ freq, vout, ibias = self.parse_output(output_path)
+ gain = self.find_dc_gain(vout)
+ ugbw = self.find_ugbw(freq, vout)
+ phm = self.find_phm(freq, vout)
+
+ spec = dict(ugbw=ugbw, gain=gain, phm=phm, ibias=ibias)
+
+ return spec
+
+ def parse_output(self, output_path):
+
+ ac_fname = os.path.join(output_path, "ac.csv")
+ dc_fname = os.path.join(output_path, "dc.csv")
+
+ if not os.path.isfile(ac_fname) or not os.path.isfile(dc_fname):
+ print("ac/dc file doesn't exist: %s" % output_path)
+
+ ac_raw_outputs = np.genfromtxt(ac_fname, skip_header=1)
+ dc_raw_outputs = np.genfromtxt(dc_fname, skip_header=1)
+ freq = ac_raw_outputs[:, 0]
+ vout_real = ac_raw_outputs[:, 1]
+ vout_imag = ac_raw_outputs[:, 2]
+ vout = vout_real + 1j * vout_imag
+ ibias = -dc_raw_outputs[1]
+
+ return freq, vout, ibias
+
+ def find_dc_gain(self, vout):
+ return np.abs(vout)[0]
+
+ def find_ugbw(self, freq, vout):
+ gain = np.abs(vout)
+ ugbw, valid = self._get_best_crossing(freq, gain, val=1)
+ if valid:
+ return ugbw
+ else:
+ return freq[0]
+
+ def find_phm(self, freq, vout):
+ gain = np.abs(vout)
+ phase = np.angle(vout, deg=False)
+ phase = np.unwrap(phase) # unwrap the discontinuity
+ phase = np.rad2deg(phase) # convert to degrees
+ #
+ # plt.subplot(211)
+ # plt.plot(np.log10(freq[:200]), 20*np.log10(gain[:200]))
+ # plt.subplot(212)
+ # plt.plot(np.log10(freq[:200]), phase)
+
+ phase_fun = interp.interp1d(freq, phase, kind="quadratic")
+ ugbw, valid = self._get_best_crossing(freq, gain, val=1)
+ if valid:
+ if phase_fun(ugbw) > 0:
+ return -180 + phase_fun(ugbw)
+ else:
+ return 180 + phase_fun(ugbw)
+ else:
+ return -180
+
+ def _get_best_crossing(cls, xvec, yvec, val):
+ interp_fun = interp.InterpolatedUnivariateSpline(xvec, yvec)
+
+ def fzero(x):
+ return interp_fun(x) - val
+
+ xstart, xstop = xvec[0], xvec[-1]
+ try:
+ return sciopt.brentq(fzero, xstart, xstop), True
+ except ValueError:
+ # avoid no solution
+ # if abs(fzero(xstart)) < abs(fzero(xstop)):
+ # return xstart
+ return xstop, False
+
+ def create_design(self, state, new_fname):
+ design_folder = os.path.join(self.gen_dir, new_fname) + str(
+ random.randint(0, 10000)
+ )
+ os.makedirs(design_folder, exist_ok=True)
+
+ fpath = os.path.join(design_folder, new_fname + ".cir")
+ state["design_path"] = design_folder
+ with open(fpath, "w") as f:
+ # render the design netlist with the current state
+ netlist_str = self.design_template.render(state)
+ f.write(netlist_str)
+ f.close()
+
+ return design_folder, fpath
diff --git a/eval_engines/ngspice/ngspice_inputs/netlist/Zhenxin_S_FC.cir b/eval_engines/ngspice/ngspice_inputs/netlist/Zhenxin_S_FC.cir
new file mode 100644
index 0000000..2951a24
--- /dev/null
+++ b/eval_engines/ngspice/ngspice_inputs/netlist/Zhenxin_S_FC.cir
@@ -0,0 +1,64 @@
+*Zhenxin_S_FC
+
+.include "/home/pham/code/analog-ml/AutoCkt/eval_engines/ngspice/ngspice_inputs/spice_models/65nm_bulk.txt"
+
+* Parameters
+.param tempc=25.0
+.param wm12={{w_m12}}n lm12=130n mm12=1
+.param wm3={{w_m3}}n lm3=130n mm3=1
+.param wm45={{w_m45}}n lm45=130n mm45=1
+.param wm67={{w_m67}}n lm67=130n mm67=1
+.param wm89={{w_m89}}n lm89=130n mm89=1
+.param wm1011={{w_m1011}}n lm1011=130n mm1011=1
+
+.param vbp1={{vbp1}}
+.param vbp2={{vbp2}}
+.param vbn1={{vbn1}}
+.param vbn2={{vbn2}}
+
+.param vdd=1.2
+.param vcm=0.6
+
+
+M3 N004 Vbp1 VDD VDD pmos W={wm3} L={lm3} m={mm3}
+M4 N002 N001 VDD VDD pmos W={wm45} L={lm45} m={mm45}
+M5 N003 N001 VDD VDD pmos W={wm45} L={lm45} m={mm45}
+M7 Vout Vbp2 N003 N003 pmos W={wm67} L={lm67} m={mm67}
+M6 N001 Vbp2 N002 N002 pmos W={wm67} L={lm67} m={mm67}
+M1 N006 Vinp N004 N004 pmos W={wm12} L={lm12} m={mm12}
+M2 N005 Vinn N004 N004 pmos W={wm12} L={lm12} m={mm12}
+M8 N001 Vbn1 N006 N006 nmos W={wm89} L={lm89} m={mm89}
+M9 Vout Vbn1 N005 N005 nmos W={wm89} L={lm89} m={mm89}
+M10 N006 Vbn2 0 0 nmos W={wm1011} L={lm1011} m={mm1011}
+M11 N005 Vbn2 0 0 nmos W={wm1011} L={lm1011} m={mm1011}
+
+
+
+vin in 0 dc=0 ac=1.0
+ein1 Vinp cm in 0 0.5
+ein2 Vinn cm in 0 -0.5
+vcm cm 0 dc={vcm}
+
+vdd VDD 0 dc=1.2
+vss 0 VSS dc=0
+Ccomp N001 Vout {{cc}}p
+Cload Vout 0 1p
+
+VBP1 Vbp1 0 DC {vbp1}
+VBP2 Vbp2 0 DC {vbp2}
+VBN1 Vbn1 0 DC {vbn1}
+VBN2 Vbn2 0 DC {vbn2}
+
+.ac dec 10 1 10G
+
+.control
+run
+set units=degrees
+set wr_vecnames
+option numdgt=7
+wrdata {{design_path}}/ac.csv v(Vout)
+op
+wrdata {{design_path}}/dc.csv i(vdd)
+.endc
+
+.end
diff --git a/eval_engines/ngspice/ngspice_inputs/netlist/ledro_d_fc.cir b/eval_engines/ngspice/ngspice_inputs/netlist/ledro_d_fc.cir
new file mode 100644
index 0000000..3d10624
--- /dev/null
+++ b/eval_engines/ngspice/ngspice_inputs/netlist/ledro_d_fc.cir
@@ -0,0 +1,109 @@
+*fully_differential_folded_cascode.css (ledro)
+
+.include /home/pham/shared_files/ngspice/Modelcards/PTM-MG/lstp/7nfet.pm
+.include /home/pham/shared_files/ngspice/Modelcards/PTM-MG/lstp/7pfet.pm
+
+* Parameters
+.param tempc=27.0
+.param nA1={{nA1}} nB1={{nB1}}
+.param nA2={{nA2}} nB2={{nB2}}
+.param nA3={{nA3}} nB3={{nB3}}
+.param nA4={{nA4}} nB4={{nB4}}
+.param nA5={{nA5}} nB5={{nB5}}
+.param nA6={{nA6}} nB6={{nB6}}
+.param vdd=0.8 vcm=0.4 vbiasp1={{vbiasp1}} vbiasp2={{vbiasp2}}
+.param vbiasn0={{vbiasn0}} vbiasn1={{vbiasn1}} vbiasn2={{vbiasn2}}
+
+NM6 Voutp Vbiasp2 net23 vdd pfet L={nA1} NFIN={nB1}
+NM5 Voutn Vbiasp2 net24 vdd pfet L={nA1} NFIN={nB1}
+NM2 net23 Vbiasp1 vdd vdd pfet L={nA2} NFIN={nB2}
+NM1 net24 Vbiasp1 vdd vdd pfet L={nA2} NFIN={nB2}
+NM8 Voutp Vbiasn2 net27 0 nfet L={nA3} NFIN={nB3}
+NM7 Voutn Vbiasn2 net25 0 nfet L={nA3} NFIN={nB3}
+NM3 net24 Vinp net13 0 nfet L={nA4} NFIN={nB4}
+NM0 net23 Vinn net13 0 nfet L={nA4} NFIN={nB4}
+NM10 net27 Vbiasn1 0 0 nfet L={nA5} NFIN={nB5}
+NM9 net25 Vbiasn1 0 0 nfet L={nA5} NFIN={nB5}
+NM4 net13 Vbiasn0 0 0 nfet L={nA6} NFIN={nB6}
+
+* Voltage sources
+* VS gnd 0 DC 0
+V0 vdd 0 DC {vdd}
+V2 in 0 DC 0 AC 1
+E1 Vinp cm in 0 0.5
+E0 Vinn cm in 0 -0.5
+V1 cm 0 DC {vcm}
+VP1 Vbiasp1 0 DC {vbiasp1}
+VP2 Vbiasp2 0 DC {vbiasp2}
+VN Vbiasn0 0 DC {vbiasn0}
+VN1 Vbiasn1 0 DC {vbiasn1}
+VN2 Vbiasn2 0 DC {vbiasn2}
+
+
+.control
+op
+pre_osdi /home/pham/shared_files/ngspice/osdilibs/bsimcmg.osdi
+set xbrushwidth=3
+set filetype=ascii
+run
+
+let vgs_nm0 = v(Vinn) - v(net13)
+let vds_nm0 = v(net23) - v(net13)
+
+let vgs_nm1 = v(Vbiasp1) - v(vdd)
+let vds_nm1 = v(net24) - v(vdd)
+
+let vgs_nm2 = v(Vbiasp1) - v(vdd)
+let vds_nm2 = v(net23) - v(vdd)
+
+let vgs_nm3 = v(Vinp) - v(net13)
+let vds_nm3 = v(net24) - v(net13)
+
+let vgs_nm4 = v(Vbiasn0)
+let vds_nm4 = v(net13)
+
+let vgs_nm5 = v(Vbiasp2) - v(net24)
+let vds_nm5 = v(Voutn) - v(net24)
+
+let vgs_nm6 = v(Vbiasp2) - v(net23)
+let vds_nm6 = v(Voutp) - v(net23)
+
+let vgs_nm7 = v(Vbiasn2) - v(net25)
+let vds_nm7 = v(Voutn) - v(net25)
+
+let vgs_nm8 = v(Vbiasn2) - v(net27)
+let vds_nm8 = v(Voutp) - v(net27)
+
+let vgs_nm9 = v(Vbiasn1)
+let vds_nm9 = v(net25)
+
+let vgs_nm10 = v(Vbiasn1)
+let vds_nm10 = v(net27)
+
+write output.log I(V0) @nm0[gm] @nm0[ids] @nm0[vth] vgs_nm0 vds_nm0
++ @nm1[gm] @nm1[ids] @nm1[vth] vgs_nm1 vds_nm1
++ @nm2[gm] @nm2[ids] @nm2[vth] vgs_nm2 vds_nm2
++ @nm3[gm] @nm3[ids] @nm3[vth] vgs_nm3 vds_nm3
++ @nm4[gm] @nm4[ids] @nm4[vth] vgs_nm4 vds_nm4
++ @nm5[gm] @nm5[ids] @nm5[vth] vgs_nm5 vds_nm5
++ @nm6[gm] @nm6[ids] @nm6[vth] vgs_nm6 vds_nm6
++ @nm7[gm] @nm7[ids] @nm7[vth] vgs_nm7 vds_nm7
++ @nm8[gm] @nm8[ids] @nm8[vth] vgs_nm8 vds_nm8
++ @nm9[gm] @nm9[ids] @nm9[vth] vgs_nm9 vds_nm9
++ @nm10[gm] @nm10[ids] @nm10[vth] vgs_nm10 vds_nm10
+
+
+ac dec 10 1 100G
+run
+set units=degrees
+set wr_vecnames
+option numdgt=7
+wrdata {{design_path}}/ac.csv v(Voutp)-v(Voutn)
+
+
+op
+wrdata {{design_path}}/dc.csv i(V0)
+quit
+.endc
+
+.end
\ No newline at end of file
diff --git a/eval_engines/ngspice/ngspice_inputs/netlist/ledro_d_fc45.cir b/eval_engines/ngspice/ngspice_inputs/netlist/ledro_d_fc45.cir
new file mode 100644
index 0000000..223175a
--- /dev/null
+++ b/eval_engines/ngspice/ngspice_inputs/netlist/ledro_d_fc45.cir
@@ -0,0 +1,107 @@
+*fully_differential_folded_cascode.css (ledro)
+
+.include "/home/pham/code/analog-ml/AutoCkt/eval_engines/ngspice/ngspice_inputs/spice_models/45nm_bulk.txt"
+
+* Parameters
+.param tempc=27.0
+.param wp1={{wp1}}u lp1={{lp1}}n mp1={{mp1}}
+.param wp2={{wp2}}u lp2={{lp2}}n mp2={{mp2}}
+.param wp3={{wp3}}u lp3={{lp3}}n mp3={{mp3}}
+.param wp4={{wp4}}u lp4={{lp4}}n mp4={{mp4}}
+.param wp5={{wp5}}u lp5={{lp5}}n mp5={{mp5}}
+.param wp6={{wp6}}u lp6={{lp6}}n mp6={{mp6}}
+.param vdd=1.2 vcm=0.6 vbiasp1={{vbiasp1}} vbiasp2={{vbiasp2}}
+.param vbiasn0={{vbiasn0}} vbiasn1={{vbiasn1}} vbiasn2={{vbiasn2}}
+
+M6 Voutp Vbiasp2 net23 vdd pmos W={wp1} L={lp1} m={mp1}
+M5 Voutn Vbiasp2 net24 vdd pmos W={wp1} L={lp1} m={mp1}
+M2 net23 Vbiasp1 vdd vdd pmos W={wp2} L={lp2} m={mp2}
+M1 net24 Vbiasp1 vdd vdd pmos W={wp2} L={lp2} m={mp2}
+M8 Voutp Vbiasn2 net27 0 nmos W={wp3} L={lp3} m={mp3}
+M7 Voutn Vbiasn2 net25 0 nmos W={wp3} L={lp3} m={mp3}
+M3 net24 Vinp net13 0 nmos W={wp4} L={lp4} m={mp4}
+M0 net23 Vinn net13 0 nmos W={wp4} L={lp4} m={mp4}
+M10 net27 Vbiasn1 0 0 nmos W={wp5} L={lp5} m={mp5}
+M9 net25 Vbiasn1 0 0 nmos W={wp5} L={lp5} m={mp5}
+M4 net13 Vbiasn0 0 0 nmos W={wp6} L={lp6} m={mp6}
+
+* Voltage sources
+* VS gnd 0 DC 0
+V0 vdd 0 DC {vdd}
+V2 in 0 DC 0 AC 1
+E1 Vinp cm in 0 0.5
+E0 Vinn cm in 0 -0.5
+V1 cm 0 DC {vcm}
+VP1 Vbiasp1 0 DC {vbiasp1}
+VP2 Vbiasp2 0 DC {vbiasp2}
+VN Vbiasn0 0 DC {vbiasn0}
+VN1 Vbiasn1 0 DC {vbiasn1}
+VN2 Vbiasn2 0 DC {vbiasn2}
+
+
+.control
+op
+set xbrushwidth=3
+set filetype=ascii
+run
+
+let vgs_nm0 = v(Vinn) - v(net13)
+let vds_nm0 = v(net23) - v(net13)
+
+let vgs_nm1 = v(Vbiasp1) - v(vdd)
+let vds_nm1 = v(net24) - v(vdd)
+
+let vgs_nm2 = v(Vbiasp1) - v(vdd)
+let vds_nm2 = v(net23) - v(vdd)
+
+let vgs_nm3 = v(Vinp) - v(net13)
+let vds_nm3 = v(net24) - v(net13)
+
+let vgs_nm4 = v(Vbiasn0)
+let vds_nm4 = v(net13)
+
+let vgs_nm5 = v(Vbiasp2) - v(net24)
+let vds_nm5 = v(Voutn) - v(net24)
+
+let vgs_nm6 = v(Vbiasp2) - v(net23)
+let vds_nm6 = v(Voutp) - v(net23)
+
+let vgs_nm7 = v(Vbiasn2) - v(net25)
+let vds_nm7 = v(Voutn) - v(net25)
+
+let vgs_nm8 = v(Vbiasn2) - v(net27)
+let vds_nm8 = v(Voutp) - v(net27)
+
+let vgs_nm9 = v(Vbiasn1)
+let vds_nm9 = v(net25)
+
+let vgs_nm10 = v(Vbiasn1)
+let vds_nm10 = v(net27)
+
+write output.log I(V0) @m0[gm] @m0[ids] @m0[vth] vgs_nm0 vds_nm0
++ @m1[gm] @m1[ids] @m1[vth] vgs_nm1 vds_nm1
++ @m2[gm] @m2[ids] @m2[vth] vgs_nm2 vds_nm2
++ @m3[gm] @m3[ids] @m3[vth] vgs_nm3 vds_nm3
++ @m4[gm] @m4[ids] @m4[vth] vgs_nm4 vds_nm4
++ @m5[gm] @m5[ids] @m5[vth] vgs_nm5 vds_nm5
++ @m6[gm] @m6[ids] @m6[vth] vgs_nm6 vds_nm6
++ @m7[gm] @m7[ids] @m7[vth] vgs_nm7 vds_nm7
++ @m8[gm] @m8[ids] @m8[vth] vgs_nm8 vds_nm8
++ @m9[gm] @m9[ids] @m9[vth] vgs_nm9 vds_nm9
++ @m10[gm] @m10[ids] @m10[vth] vgs_nm10 vds_nm10
+
+
+ac dec 10 1 100G
+run
+set units=degrees
+set wr_vecnames
+option numdgt=7
+wrdata {{design_path}}/ac.csv v(Voutp)-v(Voutn)
+
+
+op
+wrdata {{design_path}}/dc.csv i(V0)
+quit
+.endc
+
+.end
\ No newline at end of file
diff --git a/eval_engines/ngspice/ngspice_inputs/spice_models/65nm_bulk.txt b/eval_engines/ngspice/ngspice_inputs/spice_models/65nm_bulk.txt
new file mode 100644
index 0000000..2a0b128
--- /dev/null
+++ b/eval_engines/ngspice/ngspice_inputs/spice_models/65nm_bulk.txt
@@ -0,0 +1,143 @@
+* Beta Version released on 2/22/06
+
+* PTM 65nm NMOS
+
+.model nmos nmos level = 54
+
++version = 4.0 binunit = 1 paramchk= 1 mobmod = 0
++capmod = 2 igcmod = 1 igbmod = 1 geomod = 1
++diomod = 1 rdsmod = 0 rbodymod= 1 rgatemod= 1
++permod = 1 acnqsmod= 0 trnqsmod= 0
+
++tnom = 27 toxe = 1.85e-9 toxp = 1.2e-9 toxm = 1.85e-9
++dtox = 0.65e-9 epsrox = 3.9 wint = 5e-009 lint = 5.25e-009
++ll = 0 wl = 0 lln = 1 wln = 1
++lw = 0 ww = 0 lwn = 1 wwn = 1
++lwl = 0 wwl = 0 xpart = 0 toxref = 1.85e-9
++xl = -30e-9
++vth0 = 0.423 k1 = 0.4 k2 = 0.01 k3 = 0
++k3b = 0 w0 = 2.5e-006 dvt0 = 1 dvt1 = 2
++dvt2 = -0.032 dvt0w = 0 dvt1w = 0 dvt2w = 0
++dsub = 0.1 minv = 0.05 voffl = 0 dvtp0 = 1.0e-009
++dvtp1 = 0.1 lpe0 = 0 lpeb = 0 xj = 1.96e-008
++ngate = 2e+020 ndep = 2.54e+018 nsd = 2e+020 phin = 0
++cdsc = 0.000 cdscb = 0 cdscd = 0 cit = 0
++voff = -0.13 nfactor = 1.9 eta0 = 0.0058 etab = 0
++vfb = -0.55 u0 = 0.0491 ua = 6e-010 ub = 1.2e-018
++uc = 0 vsat = 124340 a0 = 1.0 ags = 1e-020
++a1 = 0 a2 = 1.0 b0 = 0 b1 = 0
++keta = 0.04 dwg = 0 dwb = 0 pclm = 0.04
++pdiblc1 = 0.001 pdiblc2 = 0.001 pdiblcb = -0.005 drout = 0.5
++pvag = 1e-020 delta = 0.01 pscbe1 = 8.14e+008 pscbe2 = 1e-007
++fprout = 0.2 pdits = 0.08 pditsd = 0.23 pditsl = 2.3e+006
++rsh = 5 rdsw = 165 rsw = 85 rdw = 85
++rdswmin = 0 rdwmin = 0 rswmin = 0 prwg = 0
++prwb = 6.8e-011 wr = 1 alpha0 = 0.074 alpha1 = 0.005
++beta0 = 30 agidl = 0.0002 bgidl = 2.1e+009 cgidl = 0.0002
++egidl = 0.8
+
++aigbacc = 0.012 bigbacc = 0.0028 cigbacc = 0.002
++nigbacc = 1 aigbinv = 0.014 bigbinv = 0.004 cigbinv = 0.004
++eigbinv = 1.1 nigbinv = 3 aigc = 0.012 bigc = 0.0028
++cigc = 0.002 aigsd = 0.012 bigsd = 0.0028 cigsd = 0.002
++nigc = 1 poxedge = 1 pigcd = 1 ntox = 1
+
++xrcrg1 = 12 xrcrg2 = 5
++cgso = 1.5e-010 cgdo = 1.5e-010 cgbo = 2.56e-011 cgdl = 2.653e-10
++cgsl = 2.653e-10 ckappas = 0.03 ckappad = 0.03 acde = 1
++moin = 15 noff = 0.9 voffcv = 0.02
+
++kt1 = -0.11 kt1l = 0 kt2 = 0.022 ute = -1.5
++ua1 = 4.31e-009 ub1 = 7.61e-018 uc1 = -5.6e-011 prt = 0
++at = 33000
+
++fnoimod = 1 tnoimod = 0
+
++jss = 0.0001 jsws = 1e-011 jswgs = 1e-010 njs = 1
++ijthsfwd= 0.01 ijthsrev= 0.001 bvs = 10 xjbvs = 1
++jsd = 0.0001 jswd = 1e-011 jswgd = 1e-010 njd = 1
++ijthdfwd= 0.01 ijthdrev= 0.001 bvd = 10 xjbvd = 1
++pbs = 1 cjs = 0.0005 mjs = 0.5 pbsws = 1
++cjsws = 5e-010 mjsws = 0.33 pbswgs = 1 cjswgs = 3e-010
++mjswgs = 0.33 pbd = 1 cjd = 0.0005 mjd = 0.5
++pbswd = 1 cjswd = 5e-010 mjswd = 0.33 pbswgd = 1
++cjswgd = 5e-010 mjswgd = 0.33 tpb = 0.005 tcj = 0.001
++tpbsw = 0.005 tcjsw = 0.001 tpbswg = 0.005 tcjswg = 0.001
++xtis = 3 xtid = 3
+
++dmcg = 0e-006 dmci = 0e-006 dmdg = 0e-006 dmcgt = 0e-007
++dwj = 0.0e-008 xgw = 0e-007 xgl = 0e-008
+
++rshg = 0.4 gbmin = 1e-010 rbpb = 5 rbpd = 15
++rbps = 15 rbdb = 15 rbsb = 15 ngcon = 1
+
+* PTM 65nm PMOS
+
+.model pmos pmos level = 54
+
++version = 4.0 binunit = 1 paramchk= 1 mobmod = 0
++capmod = 2 igcmod = 1 igbmod = 1 geomod = 1
++diomod = 1 rdsmod = 0 rbodymod= 1 rgatemod= 1
++permod = 1 acnqsmod= 0 trnqsmod= 0
+
++tnom = 27 toxe = 1.95e-009 toxp = 1.2e-009 toxm = 1.95e-009
++dtox = 0.75e-9 epsrox = 3.9 wint = 5e-009 lint = 5.25e-009
++ll = 0 wl = 0 lln = 1 wln = 1
++lw = 0 ww = 0 lwn = 1 wwn = 1
++lwl = 0 wwl = 0 xpart = 0 toxref = 1.95e-009
++xl = -30e-9
++vth0 = -0.365 k1 = 0.4 k2 = -0.01 k3 = 0
++k3b = 0 w0 = 2.5e-006 dvt0 = 1 dvt1 = 2
++dvt2 = -0.032 dvt0w = 0 dvt1w = 0 dvt2w = 0
++dsub = 0.1 minv = 0.05 voffl = 0 dvtp0 = 1e-009
++dvtp1 = 0.05 lpe0 = 0 lpeb = 0 xj = 1.96e-008
++ngate = 2e+020 ndep = 1.87e+018 nsd = 2e+020 phin = 0
++cdsc = 0.000 cdscb = 0 cdscd = 0 cit = 0
++voff = -0.126 nfactor = 1.9 eta0 = 0.0058 etab = 0
++vfb = 0.55 u0 = 0.00574 ua = 2.0e-009 ub = 0.5e-018
++uc = 0 vsat = 70000 a0 = 1.0 ags = 1e-020
++a1 = 0 a2 = 1 b0 = -1e-020 b1 = 0
++keta = -0.047 dwg = 0 dwb = 0 pclm = 0.12
++pdiblc1 = 0.001 pdiblc2 = 0.001 pdiblcb = 3.4e-008 drout = 0.56
++pvag = 1e-020 delta = 0.01 pscbe1 = 8.14e+008 pscbe2 = 9.58e-007
++fprout = 0.2 pdits = 0.08 pditsd = 0.23 pditsl = 2.3e+006
++rsh = 5 rdsw = 165 rsw = 85 rdw = 85
++rdswmin = 0 rdwmin = 0 rswmin = 0 prwg = 3.22e-008
++prwb = 6.8e-011 wr = 1 alpha0 = 0.074 alpha1 = 0.005
++beta0 = 30 agidl = 0.0002 bgidl = 2.1e+009 cgidl = 0.0002
++egidl = 0.8
+
++aigbacc = 0.012 bigbacc = 0.0028 cigbacc = 0.002
++nigbacc = 1 aigbinv = 0.014 bigbinv = 0.004 cigbinv = 0.004
++eigbinv = 1.1 nigbinv = 3 aigc = 0.69 bigc = 0.0012
++cigc = 0.0008 aigsd = 0.0087 bigsd = 0.0012 cigsd = 0.0008
++nigc = 1 poxedge = 1 pigcd = 1 ntox = 1
+
++xrcrg1 = 12 xrcrg2 = 5
++cgso = 1.5e-010 cgdo = 1.5e-010 cgbo = 2.56e-011 cgdl = 2.653e-10
++cgsl = 2.653e-10 ckappas = 0.03 ckappad = 0.03 acde = 1
++moin = 15 noff = 0.9 voffcv = 0.02
+
++kt1 = -0.11 kt1l = 0 kt2 = 0.022 ute = -1.5
++ua1 = 4.31e-009 ub1 = 7.61e-018 uc1 = -5.6e-011 prt = 0
++at = 33000
+
++fnoimod = 1 tnoimod = 0
+
++jss = 0.0001 jsws = 1e-011 jswgs = 1e-010 njs = 1
++ijthsfwd= 0.01 ijthsrev= 0.001 bvs = 10 xjbvs = 1
++jsd = 0.0001 jswd = 1e-011 jswgd = 1e-010 njd = 1
++ijthdfwd= 0.01 ijthdrev= 0.001 bvd = 10 xjbvd = 1
++pbs = 1 cjs = 0.0005 mjs = 0.5 pbsws = 1
++cjsws = 5e-010 mjsws = 0.33 pbswgs = 1 cjswgs = 3e-010
++mjswgs = 0.33 pbd = 1 cjd = 0.0005 mjd = 0.5
++pbswd = 1 cjswd = 5e-010 mjswd = 0.33 pbswgd = 1
++cjswgd = 5e-010 mjswgd = 0.33 tpb = 0.005 tcj = 0.001
++tpbsw = 0.005 tcjsw = 0.001 tpbswg = 0.005 tcjswg = 0.001
++xtis = 3 xtid = 3
+
++dmcg = 0e-006 dmci = 0e-006 dmdg = 0e-006 dmcgt = 0e-007
++dwj = 0.0e-008 xgw = 0e-007 xgl = 0e-008
+
++rshg = 0.4 gbmin = 1e-010 rbpb = 5 rbpd = 15
++rbps = 15 rbdb = 15 rbsb = 15 ngcon = 1
diff --git a/eval_engines/ngspice/ngspice_inputs/yaml_files/ledro_d_fc.yaml b/eval_engines/ngspice/ngspice_inputs/yaml_files/ledro_d_fc.yaml
new file mode 100644
index 0000000..e88236f
--- /dev/null
+++ b/eval_engines/ngspice/ngspice_inputs/yaml_files/ledro_d_fc.yaml
@@ -0,0 +1,33 @@
+database_dir: "Autockt/checkpoint/db/ledro_d_fc_ngspice"
+dsn_netlist: "eval_engines/ngspice/ngspice_inputs/netlist/ledro_d_fc.cir"
+num_process: 1
+
+params:
+ nB1: !!python/tuple [1, 7, 7]
+ nB2: !!python/tuple [1, 7, 7]
+ nB3: !!python/tuple [1, 7, 7]
+ nB4: !!python/tuple [1, 7, 7]
+ nB5: !!python/tuple [1, 7, 7]
+ nB6: !!python/tuple [1, 7, 7]
+ nA1: !!python/tuple [!!float 10, !!float 990, 400]
+ nA2: !!python/tuple [!!float 10, !!float 990, 400]
+ nA3: !!python/tuple [!!float 10, !!float 990, 400]
+ nA4: !!python/tuple [!!float 10, !!float 990, 400]
+ nA5: !!python/tuple [!!float 10, !!float 990, 400]
+ nA6: !!python/tuple [!!float 10, !!float 990, 400]
+ vbiasp1: !!python/tuple [!!float 1.0, !!float 8.0, 20]
+ vbiasp2: !!python/tuple [!!float 1.0, !!float 8.0, 20]
+ vbiasn0: !!python/tuple [!!float 1.0, !!float 8.0, 20]
+ vbiasn1: !!python/tuple [!!float 1.0, !!float 8.0, 20]
+ vbiasn2: !!python/tuple [!!float 1.0, !!float 8.0, 20]
+
+
+#Every specification output will be normlaized in the order of (gain, ibias, pm, ugbw)
+normalize: !!python/tuple [3300, !!float 10e-6, 70, !!float 20.0e+6]
+
+#during training, at reset a specification will be chosen at random between [min,max,#]
+target_specs:
+ gain_min: !!python/tuple [1,3300]
+ ugbw_min: !!python/tuple [!!float 1.0e+6, !!float 20.0e+6]
+ phm_min: !!python/tuple [70,70.0000001]
+ ibias_max: !!python/tuple [1e-6, 10e-6]
diff --git a/eval_engines/ngspice/ngspice_inputs/yaml_files/ledro_d_fc45.yaml b/eval_engines/ngspice/ngspice_inputs/yaml_files/ledro_d_fc45.yaml
new file mode 100644
index 0000000..4db441c
--- /dev/null
+++ b/eval_engines/ngspice/ngspice_inputs/yaml_files/ledro_d_fc45.yaml
@@ -0,0 +1,40 @@
+database_dir: "Autockt/checkpoint/db/ledro_d_fc_ngspice"
+dsn_netlist: "eval_engines/ngspice/ngspice_inputs/netlist/ledro_d_fc45.cir"
+num_process: 1
+
+params:
+ mp1: !!python/tuple [1, 100, 100]
+ mp2: !!python/tuple [1, 100, 100]
+ mp3: !!python/tuple [1, 100, 100]
+ mp4: !!python/tuple [1, 100, 100]
+ mp5: !!python/tuple [1, 100, 100]
+ mp6: !!python/tuple [1, 100, 100]
+ wp1: !!python/tuple [0.12, 200, 100]
+ wp2: !!python/tuple [0.12, 200, 100]
+ wp3: !!python/tuple [0.12, 200, 100]
+ wp4: !!python/tuple [0.12, 200, 100]
+ wp5: !!python/tuple [0.12, 200, 100]
+ wp6: !!python/tuple [0.12, 200, 100]
+ lp1: !!python/tuple [90, 2000, 100]
+ lp2: !!python/tuple [90, 2000, 100]
+ lp3: !!python/tuple [90, 2000, 100]
+ lp4: !!python/tuple [90, 2000, 100]
+ lp5: !!python/tuple [90, 2000, 100]
+ lp6: !!python/tuple [90, 2000, 100]
+ vbiasp1: !!python/tuple [!!float 0, !!float 12, 20]
+ vbiasp2: !!python/tuple [!!float 0, !!float 12, 20]
+ vbiasn0: !!python/tuple [!!float 0, !!float 12, 20]
+ vbiasn1: !!python/tuple [!!float 0, !!float 12, 20]
+ vbiasn2: !!python/tuple [!!float 0, !!float 12, 20]
+ cl: !!python/tuple [1, 50, 100]
+ cc: !!python/tuple [1, 50, 100]
+
+#Every specification output will be normlaized in the order of (gain, ibias, pm, ugbw)
+normalize: !!python/tuple [3300, !!float 10e-6, 70, !!float 20.0e+6]
+
+#during training, at reset a specification will be chosen at random between [min,max,#]
+target_specs:
+ gain_min: !!python/tuple [1,3300]
+ ugbw_min: !!python/tuple [!!float 1.0e+6, !!float 20.0e+6]
+ phm_min: !!python/tuple [70,70.0000001]
+ ibias_max: !!python/tuple [1e-6, 10e-6]
diff --git a/eval_engines/ngspice/ngspice_inputs/yaml_files/zhenxin_s_fc.yaml b/eval_engines/ngspice/ngspice_inputs/yaml_files/zhenxin_s_fc.yaml
new file mode 100644
index 0000000..00013cb
--- /dev/null
+++ b/eval_engines/ngspice/ngspice_inputs/yaml_files/zhenxin_s_fc.yaml
@@ -0,0 +1,26 @@
+database_dir: "Autockt/checkpoint/db/Zhenxin_S_FC_ngspice"
+dsn_netlist: "eval_engines/ngspice/ngspice_inputs/netlist/Zhenxin_S_FC.cir"
+num_process: 1
+
+params:
+ w_m12: !!python/tuple [1, 100, 100]
+ w_m3: !!python/tuple [1, 100, 100]
+ w_m45: !!python/tuple [1, 100, 100]
+ w_m67: !!python/tuple [1, 100, 100]
+ w_m89: !!python/tuple [1, 100, 100]
+ w_m1011: !!python/tuple [1, 100, 100]
+ vbp1: !!python/tuple [!!float 0, !!float 12, 20]
+ vbp2: !!python/tuple [!!float 0, !!float 12, 20]
+ vbn1: !!python/tuple [!!float 0, !!float 12, 20]
+ vbn2: !!python/tuple [!!float 0, !!float 12, 20]
+ cc: !!python/tuple [1, 50, 100]
+
+#Every specification output will be normlaized in the order of (gain, ibias, pm, ugbw)
+normalize: !!python/tuple [900, !!float 10e-3, 60, !!float 5.0e+6]
+
+#during training, at reset a specification will be chosen at random between [min,max,#]
+target_specs:
+ gain_min: !!python/tuple [800,1000]
+ ugbw_min: !!python/tuple [!!float 1.0e+6, !!float 5.0e+6]
+ phm_min: !!python/tuple [60,60.0000001]
+ ibias_max: !!python/tuple [1e-3, 10e-3]
diff --git a/eval_engines/ngspice/ngspice_wrapper.py b/eval_engines/ngspice/ngspice_wrapper.py
index 8fc081a..f9a852a 100644
--- a/eval_engines/ngspice/ngspice_wrapper.py
+++ b/eval_engines/ngspice/ngspice_wrapper.py
@@ -11,8 +11,13 @@
import pprint
import yaml
import IPython
+from jinja2 import Template
+import shutil
+import datetime
+
debug = False
+
class NgSpiceWrapper(object):
BASE_TMP_DIR = os.path.abspath("/tmp/ckt_da")
@@ -23,11 +28,19 @@ def __init__(self, num_process, yaml_path, path, root_dir=None):
else:
self.root_dir = root_dir
- with open(yaml_path, 'r') as f:
+ with open(yaml_path, "r") as f:
yaml_data = yaml.load(f)
- design_netlist = yaml_data['dsn_netlist']
- design_netlist = path+'/'+design_netlist
-
+ design_netlist = yaml_data["dsn_netlist"]
+ design_netlist = path + "/" + design_netlist
+ if not os.path.isfile(design_netlist):
+ raise FileNotFoundError(
+ "Design netlist file does not exist: %s" % design_netlist
+ )
+ self.design_netlist = design_netlist
+ with open(design_netlist, "r") as f:
+ self.netlist_str = f.read()
+ self.design_template = Template(self.netlist_str)
+
_, dsg_netlist_fname = os.path.split(design_netlist)
self.base_design_name = os.path.splitext(dsg_netlist_fname)[0]
self.num_process = num_process
@@ -36,26 +49,31 @@ def __init__(self, num_process, yaml_path, path, root_dir=None):
os.makedirs(self.root_dir, exist_ok=True)
os.makedirs(self.gen_dir, exist_ok=True)
- raw_file = open(design_netlist, 'r')
+ raw_file = open(design_netlist, "r")
self.tmp_lines = raw_file.readlines()
raw_file.close()
def get_design_name(self, state):
fname = self.base_design_name
+ # fname += str(datetime.datetime.now().timestamp())
for value in state.values():
- fname += "_" + str(value)
+ # fname += "_" + str(value)
+ fname += "_" + str(round(value, 3))
+
return fname
def create_design(self, state, new_fname):
- design_folder = os.path.join(self.gen_dir, new_fname)+str(random.randint(0,10000))
+ design_folder = os.path.join(self.gen_dir, new_fname) + str(
+ random.randint(0, 10000)
+ )
os.makedirs(design_folder, exist_ok=True)
- fpath = os.path.join(design_folder, new_fname + '.cir')
+ fpath = os.path.join(design_folder, new_fname + ".cir")
lines = copy.deepcopy(self.tmp_lines)
for line_num, line in enumerate(lines):
- if '.include' in line:
- regex = re.compile("\.include\s*\"(.*?)\"")
+ if ".include" in line:
+ regex = re.compile('\.include\s*"(.*?)"')
found = regex.search(line)
if found:
# current_fpath = os.path.realpath(__file__)
@@ -63,44 +81,47 @@ def create_design(self, state, new_fname):
# parent_path = os.path.abspath(os.path.join(parent_path, os.pardir))
# path_to_model = os.path.join(parent_path, 'spice_models/45nm_bulk.txt')
# lines[line_num] = lines[line_num].replace(found.group(1), path_to_model)
- pass # do not change the model path
- if '.param' in line:
+ pass # do not change the model path
+ if ".param" in line:
for key, value in state.items():
regex = re.compile("%s=(\S+)" % (key))
found = regex.search(line)
if found:
new_replacement = "%s=%s" % (key, str(value))
- lines[line_num] = lines[line_num].replace(found.group(0), new_replacement)
- if 'wrdata' in line:
+ lines[line_num] = lines[line_num].replace(
+ found.group(0), new_replacement
+ )
+ if "wrdata" in line:
regex = re.compile("wrdata\s*(\w+\.\w+)\s*")
found = regex.search(line)
if found:
replacement = os.path.join(design_folder, found.group(1))
- lines[line_num] = lines[line_num].replace(found.group(1), replacement)
+ lines[line_num] = lines[line_num].replace(
+ found.group(1), replacement
+ )
- with open(fpath, 'w') as f:
+ with open(fpath, "w") as f:
f.writelines(lines)
f.close()
return design_folder, fpath
def simulate(self, fpath):
- info = 0 # this means no error occurred
- command = "ngspice -b %s >/dev/null 2>&1" %fpath
+ info = 0 # this means no error occurred
+ command = "ngspice -b %s >/dev/null 2>&1" % fpath
exit_code = os.system(command)
if debug:
print(command)
print(fpath)
- if (exit_code % 256):
- # raise RuntimeError('program {} failed!'.format(command))
- info = 1 # this means an error has occurred
+ if exit_code % 256:
+ # raise RuntimeError('program {} failed!'.format(command))
+ info = 1 # this means an error has occurred
return info
-
def create_design_and_simulate(self, state, dsn_name=None, verbose=False):
if debug:
- print('state', state)
- print('verbose', verbose)
+ print("state", state)
+ print("verbose", verbose)
if dsn_name == None:
dsn_name = self.get_design_name(state)
else:
@@ -110,9 +131,9 @@ def create_design_and_simulate(self, state, dsn_name=None, verbose=False):
design_folder, fpath = self.create_design(state, dsn_name)
info = self.simulate(fpath)
specs = self.translate_result(design_folder)
+ shutil.rmtree(design_folder) # clean up
return state, specs, info
-
def run(self, states, design_names=None, verbose=False):
"""
@@ -123,7 +144,10 @@ def run(self, states, design_names=None, verbose=False):
results = [(state: dict(param_kwds, param_value), specs: dict(spec_kwds, spec_value), info: int)]
"""
pool = ThreadPool(processes=self.num_process)
- arg_list = [(state, dsn_name, verbose) for (state, dsn_name)in zip(states, design_names)]
+ arg_list = [
+ (state, dsn_name, verbose)
+ for (state, dsn_name) in zip(states, design_names)
+ ]
specs = pool.starmap(self.create_design_and_simulate, arg_list)
pool.close()
return specs
diff --git a/examples/LEDRO_D_FC/convert.py b/examples/LEDRO_D_FC/convert.py
new file mode 100644
index 0000000..9357593
--- /dev/null
+++ b/examples/LEDRO_D_FC/convert.py
@@ -0,0 +1,38 @@
+from jinja2 import Template
+
+# Create a simple template string
+with open("ledro_d_fc.cir") as f:
+ data = f.read()
+
+
+# Create a Template object
+template = Template(data)
+
+state = dict({
+ "nA1": 7.45e-08,
+ "nB1": 6,
+ "nA2": 1.4e-07,
+ "nB2": 2,
+ "nA3": 3.75e-08,
+ "nB3": 3,
+ "nA4": 3.04e-07,
+ "nB4": 3,
+ "nA5": 3.72e-08,
+ "nB5": 4,
+ "nA6": 1.24e-07,
+ "nB6": 2,
+ "vbiasp1": 0.659,
+ "vbiasp2": 0.408,
+ "vbiasn0": 0.0525,
+ "vbiasn1": 0.016,
+ "vbiasn2": 0.352,
+ "vcm": 0.4,
+ "vdd": 0.8,
+ "tempc": 27,
+
+ "design_path": "/tmp"
+})
+# Render the template with a variable
+output = template.render(state)
+
+print(output)
\ No newline at end of file
diff --git a/examples/LEDRO_D_FC/ledro_d_fc.cir b/examples/LEDRO_D_FC/ledro_d_fc.cir
new file mode 100644
index 0000000..3d10624
--- /dev/null
+++ b/examples/LEDRO_D_FC/ledro_d_fc.cir
@@ -0,0 +1,109 @@
+*fully_differential_folded_cascode.css (ledro)
+
+.include /home/pham/shared_files/ngspice/Modelcards/PTM-MG/lstp/7nfet.pm
+.include /home/pham/shared_files/ngspice/Modelcards/PTM-MG/lstp/7pfet.pm
+
+* Parameters
+.param tempc=27.0
+.param nA1={{nA1}} nB1={{nB1}}
+.param nA2={{nA2}} nB2={{nB2}}
+.param nA3={{nA3}} nB3={{nB3}}
+.param nA4={{nA4}} nB4={{nB4}}
+.param nA5={{nA5}} nB5={{nB5}}
+.param nA6={{nA6}} nB6={{nB6}}
+.param vdd=0.8 vcm=0.4 vbiasp1={{vbiasp1}} vbiasp2={{vbiasp2}}
+.param vbiasn0={{vbiasn0}} vbiasn1={{vbiasn1}} vbiasn2={{vbiasn2}}
+
+NM6 Voutp Vbiasp2 net23 vdd pfet L={nA1} NFIN={nB1}
+NM5 Voutn Vbiasp2 net24 vdd pfet L={nA1} NFIN={nB1}
+NM2 net23 Vbiasp1 vdd vdd pfet L={nA2} NFIN={nB2}
+NM1 net24 Vbiasp1 vdd vdd pfet L={nA2} NFIN={nB2}
+NM8 Voutp Vbiasn2 net27 0 nfet L={nA3} NFIN={nB3}
+NM7 Voutn Vbiasn2 net25 0 nfet L={nA3} NFIN={nB3}
+NM3 net24 Vinp net13 0 nfet L={nA4} NFIN={nB4}
+NM0 net23 Vinn net13 0 nfet L={nA4} NFIN={nB4}
+NM10 net27 Vbiasn1 0 0 nfet L={nA5} NFIN={nB5}
+NM9 net25 Vbiasn1 0 0 nfet L={nA5} NFIN={nB5}
+NM4 net13 Vbiasn0 0 0 nfet L={nA6} NFIN={nB6}
+
+* Voltage sources
+* VS gnd 0 DC 0
+V0 vdd 0 DC {vdd}
+V2 in 0 DC 0 AC 1
+E1 Vinp cm in 0 0.5
+E0 Vinn cm in 0 -0.5
+V1 cm 0 DC {vcm}
+VP1 Vbiasp1 0 DC {vbiasp1}
+VP2 Vbiasp2 0 DC {vbiasp2}
+VN Vbiasn0 0 DC {vbiasn0}
+VN1 Vbiasn1 0 DC {vbiasn1}
+VN2 Vbiasn2 0 DC {vbiasn2}
+
+
+.control
+op
+pre_osdi /home/pham/shared_files/ngspice/osdilibs/bsimcmg.osdi
+set xbrushwidth=3
+set filetype=ascii
+run
+
+let vgs_nm0 = v(Vinn) - v(net13)
+let vds_nm0 = v(net23) - v(net13)
+
+let vgs_nm1 = v(Vbiasp1) - v(vdd)
+let vds_nm1 = v(net24) - v(vdd)
+
+let vgs_nm2 = v(Vbiasp1) - v(vdd)
+let vds_nm2 = v(net23) - v(vdd)
+
+let vgs_nm3 = v(Vinp) - v(net13)
+let vds_nm3 = v(net24) - v(net13)
+
+let vgs_nm4 = v(Vbiasn0)
+let vds_nm4 = v(net13)
+
+let vgs_nm5 = v(Vbiasp2) - v(net24)
+let vds_nm5 = v(Voutn) - v(net24)
+
+let vgs_nm6 = v(Vbiasp2) - v(net23)
+let vds_nm6 = v(Voutp) - v(net23)
+
+let vgs_nm7 = v(Vbiasn2) - v(net25)
+let vds_nm7 = v(Voutn) - v(net25)
+
+let vgs_nm8 = v(Vbiasn2) - v(net27)
+let vds_nm8 = v(Voutp) - v(net27)
+
+let vgs_nm9 = v(Vbiasn1)
+let vds_nm9 = v(net25)
+
+let vgs_nm10 = v(Vbiasn1)
+let vds_nm10 = v(net27)
+
+write output.log I(V0) @nm0[gm] @nm0[ids] @nm0[vth] vgs_nm0 vds_nm0
++ @nm1[gm] @nm1[ids] @nm1[vth] vgs_nm1 vds_nm1
++ @nm2[gm] @nm2[ids] @nm2[vth] vgs_nm2 vds_nm2
++ @nm3[gm] @nm3[ids] @nm3[vth] vgs_nm3 vds_nm3
++ @nm4[gm] @nm4[ids] @nm4[vth] vgs_nm4 vds_nm4
++ @nm5[gm] @nm5[ids] @nm5[vth] vgs_nm5 vds_nm5
++ @nm6[gm] @nm6[ids] @nm6[vth] vgs_nm6 vds_nm6
++ @nm7[gm] @nm7[ids] @nm7[vth] vgs_nm7 vds_nm7
++ @nm8[gm] @nm8[ids] @nm8[vth] vgs_nm8 vds_nm8
++ @nm9[gm] @nm9[ids] @nm9[vth] vgs_nm9 vds_nm9
++ @nm10[gm] @nm10[ids] @nm10[vth] vgs_nm10 vds_nm10
+
+
+ac dec 10 1 100G
+run
+set units=degrees
+set wr_vecnames
+option numdgt=7
+wrdata {{design_path}}/ac.csv v(Voutp)-v(Voutn)
+
+
+op
+wrdata {{design_path}}/dc.csv i(V0)
+quit
+.endc
+
+.end
\ No newline at end of file
diff --git a/examples/LEDRO_D_FC_45/action_normalizer.py b/examples/LEDRO_D_FC_45/action_normalizer.py
new file mode 100644
index 0000000..3ed6bb0
--- /dev/null
+++ b/examples/LEDRO_D_FC_45/action_normalizer.py
@@ -0,0 +1,89 @@
+import numpy as np
+from gymnasium import spaces
+
+class ActionNormalizer():
+ """Rescale and relocate the actions."""
+ def __init__(self, action_space_low, action_space_high):
+
+ self.action_space_low = action_space_low
+ self.action_space_high = action_space_high
+
+ def action(self, action: np.ndarray) -> np.ndarray:
+ """Change the range (-1, 1) to (low, high)."""
+ low = self.action_space_low
+ high = self.action_space_high
+
+ scale_factor = (high - low) / 2
+ reloc_factor = high - scale_factor
+
+ action = action * scale_factor + reloc_factor
+ action = np.clip(action, low, high)
+
+ return action
+
+ def reverse_action(self, action: np.ndarray) -> np.ndarray:
+ """Change the range (low, high) to (-1, 1)."""
+ low = self.action_space_low
+ high = self.action_space_high
+
+ scale_factor = (high - low) / 2
+ reloc_factor = high - scale_factor
+
+ action = (action - reloc_factor) / scale_factor
+ action = np.clip(action, -1.0, 1.0)
+
+ return action
+
+
+action_space = spaces.Box(low=-1, high=1, shape=(25, ), dtype=np.float64)
+# print (action_space.sample())
+
+# L: Rationale: start at ~2× technology minimum to reduce short-channel effects and improve matching.
+action_space_low = np.array(
+ [
+ 0.12, 90, 1,
+ 0.12, 90, 1,
+ 0.12, 90, 1,
+ 0.12, 90, 1,
+ 0.12, 90, 1,
+ 0.12, 90, 1,
+ 0.1,
+ 0.1,
+ 0.1,
+ 0.1,
+ 0.1,
+
+1,
+1
+
+ ]
+)
+
+action_space_high = np.array(
+ [
+ 200, 2000, 100,
+ 200, 2000, 100,
+ 200, 2000, 100,
+ 200, 2000, 100,
+ 200, 2000, 100,
+ 200, 2000, 100,
+ 1.2,
+ 1.2,
+ 1.2,
+ 1.2,
+ 1.2,
+
+50,
+50
+
+ ]
+)
+action = ActionNormalizer(action_space_low=action_space_low, action_space_high = action_space_high).action(action_space.sample()) # convert [-1.1] range back to normal range
+action = action.astype(object)
+
+print ("action: ", action)
+
+for idx in [2, 2+3, 5+3, 8+3, 11+3, 14+3, -1, -2]:
+ action[idx] = int(action[idx])
+
+print ("action: ", action)
\ No newline at end of file
diff --git a/examples/LEDRO_D_FC_45/ledro_d_fc45.cir b/examples/LEDRO_D_FC_45/ledro_d_fc45.cir
new file mode 100644
index 0000000..a14b2c5
--- /dev/null
+++ b/examples/LEDRO_D_FC_45/ledro_d_fc45.cir
@@ -0,0 +1,107 @@
+*fully_differential_folded_cascode.css (ledro)
+
+.include "/home/pham/code/analog-ml/AutoCkt/eval_engines/ngspice/ngspice_inputs/spice_models/45nm_bulk.txt"
+
+* Parameters
+.param tempc=27.0
+.param wp1=0.5u lp1=90n mp1={{mp1}}
+.param wp2=0.5u lp2=90n mp2={{mp2}}
+.param wp3=0.5u lp3=90n mp3={{mp3}}
+.param wp4=0.5u lp4=90n mp4={{mp4}}
+.param wp5=0.5u lp5=90n mp5={{mp5}}
+.param wp6=0.5u lp6=90n mp6={{mp6}}
+.param vdd=1.2 vcm=0.6 vbiasp1={{vbiasp1}} vbiasp2={{vbiasp2}}
+.param vbiasn0={{vbiasn0}} vbiasn1={{vbiasn1}} vbiasn2={{vbiasn2}}
+
+M6 Voutp Vbiasp2 net23 vdd pmos W={wp1} L={lp1} m={mp1}
+M5 Voutn Vbiasp2 net24 vdd pmos W={wp1} L={lp1} m={mp1}
+M2 net23 Vbiasp1 vdd vdd pmos W={wp2} L={lp2} m={mp2}
+M1 net24 Vbiasp1 vdd vdd pmos W={wp2} L={lp2} m={mp2}
+M8 Voutp Vbiasn2 net27 0 nmos W={wp3} L={lp3} m={mp3}
+M7 Voutn Vbiasn2 net25 0 nmos W={wp3} L={lp3} m={mp3}
+M3 net24 Vinp net13 0 nmos W={wp4} L={lp4} m={mp4}
+M0 net23 Vinn net13 0 nmos W={wp4} L={lp4} m={mp4}
+M10 net27 Vbiasn1 0 0 nmos W={wp5} L={lp5} m={mp5}
+M9 net25 Vbiasn1 0 0 nmos W={wp5} L={lp5} m={mp5}
+M4 net13 Vbiasn0 0 0 nmos W={wp6} L={lp6} m={mp6}
+
+* Voltage sources
+* VS gnd 0 DC 0
+V0 vdd 0 DC {vdd}
+V2 in 0 DC 0 AC 1
+E1 Vinp cm in 0 0.5
+E0 Vinn cm in 0 -0.5
+V1 cm 0 DC {vcm}
+VP1 Vbiasp1 0 DC {vbiasp1}
+VP2 Vbiasp2 0 DC {vbiasp2}
+VN Vbiasn0 0 DC {vbiasn0}
+VN1 Vbiasn1 0 DC {vbiasn1}
+VN2 Vbiasn2 0 DC {vbiasn2}
+
+
+.control
+op
+set xbrushwidth=3
+set filetype=ascii
+run
+
+let vgs_nm0 = v(Vinn) - v(net13)
+let vds_nm0 = v(net23) - v(net13)
+
+let vgs_nm1 = v(Vbiasp1) - v(vdd)
+let vds_nm1 = v(net24) - v(vdd)
+
+let vgs_nm2 = v(Vbiasp1) - v(vdd)
+let vds_nm2 = v(net23) - v(vdd)
+
+let vgs_nm3 = v(Vinp) - v(net13)
+let vds_nm3 = v(net24) - v(net13)
+
+let vgs_nm4 = v(Vbiasn0)
+let vds_nm4 = v(net13)
+
+let vgs_nm5 = v(Vbiasp2) - v(net24)
+let vds_nm5 = v(Voutn) - v(net24)
+
+let vgs_nm6 = v(Vbiasp2) - v(net23)
+let vds_nm6 = v(Voutp) - v(net23)
+
+let vgs_nm7 = v(Vbiasn2) - v(net25)
+let vds_nm7 = v(Voutn) - v(net25)
+
+let vgs_nm8 = v(Vbiasn2) - v(net27)
+let vds_nm8 = v(Voutp) - v(net27)
+
+let vgs_nm9 = v(Vbiasn1)
+let vds_nm9 = v(net25)
+
+let vgs_nm10 = v(Vbiasn1)
+let vds_nm10 = v(net27)
+
+write output.log I(V0) @m0[gm] @m0[ids] @m0[vth] vgs_nm0 vds_nm0
++ @m1[gm] @m1[ids] @m1[vth] vgs_nm1 vds_nm1
++ @m2[gm] @m2[ids] @m2[vth] vgs_nm2 vds_nm2
++ @m3[gm] @m3[ids] @m3[vth] vgs_nm3 vds_nm3
++ @m4[gm] @m4[ids] @m4[vth] vgs_nm4 vds_nm4
++ @m5[gm] @m5[ids] @m5[vth] vgs_nm5 vds_nm5
++ @m6[gm] @m6[ids] @m6[vth] vgs_nm6 vds_nm6
++ @m7[gm] @m7[ids] @m7[vth] vgs_nm7 vds_nm7
++ @m8[gm] @m8[ids] @m8[vth] vgs_nm8 vds_nm8
++ @m9[gm] @m9[ids] @m9[vth] vgs_nm9 vds_nm9
++ @m10[gm] @m10[ids] @m10[vth] vgs_nm10 vds_nm10
+
+
+ac dec 10 1 100G
+run
+set units=degrees
+set wr_vecnames
+option numdgt=7
+wrdata {{design_path}}/ac.csv v(Voutp)-v(Voutn)
+
+
+op
+wrdata {{design_path}}/dc.csv i(V0)
+quit
+.endc
+
+.end
\ No newline at end of file
diff --git a/examples/ex_cartpole.py b/examples/Ray/ex_cartpole.py
similarity index 100%
rename from examples/ex_cartpole.py
rename to examples/Ray/ex_cartpole.py
diff --git a/examples/Zhenxin_S_FC/Zhenxin_S_FC.cir b/examples/Zhenxin_S_FC/Zhenxin_S_FC.cir
new file mode 100644
index 0000000..8414166
--- /dev/null
+++ b/examples/Zhenxin_S_FC/Zhenxin_S_FC.cir
@@ -0,0 +1,77 @@
+*Zhenxin_S_FC
+
+.include "/home/pham/code/analog-ml/AutoCkt/eval_engines/ngspice/ngspice_inputs/spice_models/65nm_bulk.txt"
+
+* Parameters
+.param tempc=25.0
+.param wm12=28u lm12=400n mm12=1
+.param wm3=30u lm3=400n mm3=1
+.param wm45=6u lm45=600n mm45=1
+.param wm67=6u lm67=600n mm67=1
+.param wm89=5u lm89=600n mm89=1
+.param wm1011=18u lm1011=600n mm1011=1
+
+.param vbp1=0.89
+.param vbp2=0.715
+.param vbn1=0.505
+.param vbn2=0.295
+
+
+.param vdd=1.2
+.param vcm=0.6
+
+
+M3 N004 Vbp1 Vdd Vdd pmos W={wm3} L={lm3} m={mm3}
+M4 N002 N001 Vdd Vdd pmos W={wm45} L={lm45} m={mm45}
+M5 N003 N001 Vdd Vdd pmos W={wm45} L={lm45} m={mm45}
+M7 Vout Vbp2 N003 N003 pmos W={wm67} L={lm67} m={mm67}
+M6 N001 Vbp2 N002 N002 pmos W={wm67} L={lm67} m={mm67}
+M1 N006 Vinp N004 N004 pmos W={wm12} L={lm12} m={mm12}
+M2 N005 Vinn N004 N004 pmos W={wm12} L={lm12} m={mm12}
+M8 N001 Vbn1 N006 N006 nmos W={wm89} L={lm89} m={mm89}
+M9 Vout Vbn1 N005 N005 nmos W={wm89} L={lm89} m={mm89}
+M10 N006 Vbn2 0 0 nmos W={wm1011} L={lm1011} m={mm1011}
+M11 N005 Vbn2 0 0 nmos W={wm1011} L={lm1011} m={mm1011}
+
+Ccomp N001 Vout 1p
+Cload Vout 0 1p
+
+* Voltage sources
+* VS gnd 0 DC 0
+V0 vdd 0 DC {vdd}
+V2 in 0 DC 0 AC 1
+E1 Vinp cm in 0 0.5
+E0 Vinn cm in 0 -0.5
+V1 cm 0 DC {vcm}
+VP2 Vbp1 0 DC {vbp1}
+VN Vbp2 0 DC {vbp2}
+VN1 Vbn1 0 DC {vbn1}
+VN2 Vbn2 0 DC {vbn2}
+
+
+.control
+op
+set xbrushwidth=3
+set filetype=ascii
+run
+
+
+ac dec 10 1 100G
+run
+set units=degrees
+set wr_vecnames
+option numdgt=7
+wrdata ac.csv v(Vout)
+
+
+meas ac gain_bandwidth_product_ when vdb(Vout)=0
+meas ac phase_margin find vp(Vout) when vdb(Vout)=0
+wrdata GBW_PM gain_bandwidth_product_ phase_margin
+
+
+op
+wrdata dc.csv i(V0)
+quit
+.endc
+
+.end
\ No newline at end of file
diff --git a/examples/Zhenxin_S_FC/action_normalizer.py b/examples/Zhenxin_S_FC/action_normalizer.py
new file mode 100644
index 0000000..f087d21
--- /dev/null
+++ b/examples/Zhenxin_S_FC/action_normalizer.py
@@ -0,0 +1,113 @@
+import numpy as np
+from gymnasium import spaces
+
+
+class ActionNormalizer:
+ """Rescale and relocate the actions."""
+
+ def __init__(self, action_space_low, action_space_high):
+
+ """
+ Initialize the ActionNormalizer with per-dimension action bounds.
+
+ Parameters:
+ action_space_low (array-like): 1-D array of per-dimension minimum action values (lower bounds).
+ action_space_high (array-like): 1-D array of per-dimension maximum action values (upper bounds).
+
+ Both arrays must have the same shape and correspond elementwise; they are used to map actions between the canonical range (-1, 1) and the problem-specific [low, high] range.
+ """
+ self.action_space_low = action_space_low
+ self.action_space_high = action_space_high
+
+ def action(self, action: np.ndarray) -> np.ndarray:
+ """
+ Map an elementwise action from the canonical range (-1, 1) into the instance's per-dimension [low, high] bounds.
+
+ The input `action` is expected to be an ndarray with the same shape as the normalizer's bounds. Each element x is transformed with a linear mapping:
+ y = x * ((high - low) / 2) + (high - (high - low) / 2)
+ and then clipped to the corresponding [low, high] interval.
+
+ Parameters:
+ action (np.ndarray): Elementwise action values in (−1, 1) to be scaled.
+
+ Returns:
+ np.ndarray: Action mapped and clipped to the per-dimension [low, high] range.
+ """
+ low = self.action_space_low
+ high = self.action_space_high
+
+ scale_factor = (high - low) / 2
+ reloc_factor = high - scale_factor
+
+ action = action * scale_factor + reloc_factor
+ action = np.clip(action, low, high)
+
+ return action
+
+ def reverse_action(self, action: np.ndarray) -> np.ndarray:
+ """
+ Map an action from the environment bounds [low, high] back into the canonical (-1, 1) range.
+
+ Per-dimension inverse linear transform using this instance's action_space_low and action_space_high:
+ scale = (high - low) / 2 and offset = high - scale, then result = (action - offset) / scale.
+ The output is clipped elementwise to [-1.0, 1.0] and returned as an ndarray with the same shape as the input.
+ """
+ low = self.action_space_low
+ high = self.action_space_high
+
+ scale_factor = (high - low) / 2
+ reloc_factor = high - scale_factor
+
+ action = (action - reloc_factor) / scale_factor
+ action = np.clip(action, -1.0, 1.0)
+
+ return action
+
+
+action_space = spaces.Box(low=-1, high=1, shape=(24,), dtype=np.float64)
+# print (action_space.sample())
+# fmt: off
+action_space_low = np.array(
+ [
+ 0.13, 0.12, 1,
+ 0.13, 0.12, 1,
+ 0.13, 0.12, 1,
+ 0.13, 0.12, 1,
+ 0.13, 0.12, 1,
+ 0.13, 0.12, 1,
+ 0.1,
+ 0.1,
+ 0.1,
+ 0.1,
+ 0.1,
+ 0.1
+ ]
+)
+
+action_space_high = np.array(
+ [
+ 50, 2, 100,
+ 50, 2, 100,
+ 50, 2, 100,
+ 50, 2, 100,
+ 50, 2, 100,
+ 50, 2, 100,
+ 1.2,
+ 1.2,
+ 1.2,
+ 1.2,
+ 50,
+ 50
+ ]
+)
+# fmt: on
+
+action = ActionNormalizer(
+ action_space_low=action_space_low, action_space_high=action_space_high
+).action(action_space.sample())
+print("action: ", action)
+
+for idx in [2, 2 + 3, 5 + 3, 8 + 3, 11 + 3, 14 + 3, -1, -2]:
+ action[idx] = int(action[idx])
+print("action: ", action)
+print("action: ", ",".join([str(x) for x in action]))
diff --git a/examples/Zhenxin_S_FC/extract_perf.py b/examples/Zhenxin_S_FC/extract_perf.py
new file mode 100644
index 0000000..97a0e3a
--- /dev/null
+++ b/examples/Zhenxin_S_FC/extract_perf.py
@@ -0,0 +1,102 @@
+import numpy as np
+import os
+import scipy.interpolate as interp
+import scipy.optimize as sciopt
+
+debug = False
+
+
+
+class Extractor():
+ def __init__ (self):
+ pass
+
+ def translate_result(self, output_path):
+ """
+
+ :param output_path:
+ :return
+ result: dict(spec_kwds, spec_value)
+ """
+
+ # use parse output here
+ freq, vout, ibias = self.parse_output(output_path)
+ gain = self.find_dc_gain(vout)
+ ugbw = self.find_ugbw(freq, vout) / 1e6
+ phm = self.find_phm(freq, vout)
+
+ spec = dict(ugbw=ugbw, gain=gain, phm=phm, ibias=ibias)
+
+ return spec
+
+ def parse_output(self, output_path):
+
+ ac_fname = os.path.join(output_path, "ac.csv")
+ dc_fname = os.path.join(output_path, "dc.csv")
+
+ if not os.path.isfile(ac_fname) or not os.path.isfile(dc_fname):
+ print("ac/dc file doesn't exist: %s" % output_path)
+
+ ac_raw_outputs = np.genfromtxt(ac_fname, skip_header=1)
+ dc_raw_outputs = np.genfromtxt(dc_fname, skip_header=1)
+ freq = ac_raw_outputs[:, 0]
+ vout_real = ac_raw_outputs[:, 1]
+ vout_imag = ac_raw_outputs[:, 2]
+ vout = vout_real + 1j * vout_imag
+ ibias = -dc_raw_outputs[1]
+
+ return freq, vout, ibias
+
+ def find_dc_gain(self, vout):
+ # return np.abs(vout)[0]
+ return 20*np.log10(np.abs(vout)[0])
+
+
+ def find_ugbw(self, freq, vout):
+ gain = np.abs(vout)
+ ugbw, valid = self._get_best_crossing(freq, gain, val=1)
+ if valid:
+ return ugbw
+ else:
+ return freq[0]
+
+ def find_phm(self, freq, vout):
+ gain = np.abs(vout)
+ phase = np.angle(vout, deg=False)
+ phase = np.unwrap(phase) # unwrap the discontinuity
+ phase = np.rad2deg(phase) # convert to degrees
+ #
+ # plt.subplot(211)
+ # plt.plot(np.log10(freq[:200]), 20*np.log10(gain[:200]))
+ # plt.subplot(212)
+ # plt.plot(np.log10(freq[:200]), phase)
+
+ phase_fun = interp.interp1d(freq, phase, kind="quadratic")
+ ugbw, valid = self._get_best_crossing(freq, gain, val=1)
+ print ("phase_fun(ugbw): ", phase_fun(ugbw))
+ if valid:
+ if phase_fun(ugbw) > 0:
+ return -180 + phase_fun(ugbw)
+ else:
+ return 180 + phase_fun(ugbw)
+ else:
+ return -180
+
+ def _get_best_crossing(cls, xvec, yvec, val):
+ interp_fun = interp.InterpolatedUnivariateSpline(xvec, yvec)
+
+ def fzero(x):
+ return interp_fun(x) - val
+
+ xstart, xstop = xvec[0], xvec[-1]
+ try:
+ return sciopt.brentq(fzero, xstart, xstop), True
+ except ValueError:
+ # avoid no solution
+ # if abs(fzero(xstart)) < abs(fzero(xstop)):
+ # return xstart
+ return xstop, False
+
+if __name__ == "__main__":
+ extractor = Extractor()
+ print (extractor.translate_result("."))
\ No newline at end of file
diff --git a/examples/reward_calculation.py b/examples/reward_calculation.py
new file mode 100644
index 0000000..5fa0290
--- /dev/null
+++ b/examples/reward_calculation.py
@@ -0,0 +1,79 @@
+import numpy as np
+
+cur_specs = np.array([7.20504133e03, 8.66115710e-04, 4.56972473e01, 2.69241239e07])
+ideal_specs = np.array([8.02000000e02, 1.68455518e-03, 6.00000000e01, 1.90104525e06])
+
+cur_specs = np.array([7.20504133e03, 8.66115710e-04, 6.56972473e01, 2.69241239e07])
+ideal_specs = np.array([8.02000000e02, 1.68455518e-03, 6.00000000e01, 1.90104525e06])
+
+
+def lookup(spec, goal_spec):
+ """
+ Compute per-dimension normalized deviation between current and goal specifications.
+
+ The function converts goal_spec to floats and returns (spec - goal_spec) / (goal_spec + spec) elementwise.
+ Positive values indicate spec > goal_spec, negative values indicate spec < goal_spec. Inputs must be numeric arrays or array-like objects with compatible shapes; the result is a NumPy array of the same shape as the elementwise broadcast of the inputs.
+ """
+ goal_spec = [float(e) for e in goal_spec]
+ norm_spec = (spec - goal_spec) / (goal_spec + spec)
+ return norm_spec
+
+
+specs_id = ["gain_min", "ibias_max", "phm_min", "ugbw_min"]
+
+
+def reward(spec, goal_spec):
+ """
+ Compute a scalar penalty (returned as a negative reward) comparing current specs to goal specs.
+
+ Parameters:
+ spec (array-like): Current specification values (numeric sequence, same length/order as `goal_spec`).
+ goal_spec (array-like): Target specification values.
+
+ Returns:
+ float: Negative penalty value (<= 0). Larger magnitude means a larger violation of targets.
+
+ Details:
+ - Internally calls `lookup(spec, goal_spec)` to compute per-dimension normalized deviations: (spec - goal) / (spec + goal).
+ - Uses the module-level `specs_id` list to interpret each dimension. It expects each id to be one of: "ibias_max", "gain_min", "ugbw_min", "phm_min"; an AssertionError is raised otherwise.
+ - Penalty rules applied to each normalized deviation `rel_spec`:
+ - "ibias_max": penalize only when `rel_spec > 0` (i.e., current > goal).
+ - "gain_min": penalize undershoot (`rel_spec < 0`) with triple weight (3 * abs(rel_spec)).
+ - "phm_min" and "ugbw_min": penalize undershoot (`rel_spec < 0`) with weight 1 * abs(rel_spec).
+ - The function returns the negative of the accumulated penalty (so perfect or over-performing specs produce values closer to 0, while violations produce more negative values).
+ """
+ # rel_specs = self.lookup(spec, goal_spec)
+ # pos_val = []
+ # reward = 0.0
+ # for i, rel_spec in enumerate(rel_specs):
+ # if self.specs_id[i] == "ibias_max":
+ # rel_spec = rel_spec * -1.0 # /10.0
+ # if rel_spec < 0:
+ # reward += rel_spec
+ # pos_val.append(0)
+ # else:
+ # pos_val.append(1)
+
+ # return reward if reward < -0.02 else 10
+
+ norm_specs = lookup(spec, goal_spec)
+
+ # pay attention to reward calculation, this is not quite the reward function in RL
+ # but rather a penalty value for the optimization process
+ reward = 0
+ for i, rel_spec in enumerate(norm_specs):
+ # For power, smaller is better
+ # For gain, larger (compared to the target/goal) is better
+ # For other specs (pm, ugbw, etc.), smaller is better
+ assert specs_id[i] in ["ibias_max", "gain_min", "ugbw_min", "phm_min"]
+ if specs_id[i] == "ibias_max" and rel_spec > 0:
+ reward += np.abs(rel_spec) # /10
+ elif specs_id[i] == "gain_min" and rel_spec < 0:
+ reward += 3 * np.abs(rel_spec) # /10
+ elif specs_id[i] != "ibias_max" and rel_spec < 0:
+ reward += np.abs(rel_spec)
+ return -reward
+ # return -reward if -reward < -1.0 else 10
+
+
+print(reward(cur_specs, ideal_specs))
diff --git a/experiments/ledro_d_fc_7nm_run3/PPO_LEDRO_D_FC_0_2025-08-28_17-19-59h7o8x_d0/events.out.tfevents.1756394648.cda-server-3 b/experiments/ledro_d_fc_7nm_run3/PPO_LEDRO_D_FC_0_2025-08-28_17-19-59h7o8x_d0/events.out.tfevents.1756394648.cda-server-3
new file mode 100644
index 0000000..b679585
Binary files /dev/null and b/experiments/ledro_d_fc_7nm_run3/PPO_LEDRO_D_FC_0_2025-08-28_17-19-59h7o8x_d0/events.out.tfevents.1756394648.cda-server-3 differ
diff --git a/experiments/ledro_d_fc_7nm_run3/PPO_LEDRO_D_FC_0_2025-08-28_17-19-59h7o8x_d0/final_checkpoint/checkpoint-300 b/experiments/ledro_d_fc_7nm_run3/PPO_LEDRO_D_FC_0_2025-08-28_17-19-59h7o8x_d0/final_checkpoint/checkpoint-300
new file mode 100644
index 0000000..5391b31
Binary files /dev/null and b/experiments/ledro_d_fc_7nm_run3/PPO_LEDRO_D_FC_0_2025-08-28_17-19-59h7o8x_d0/final_checkpoint/checkpoint-300 differ
diff --git a/experiments/ledro_d_fc_7nm_run3/PPO_LEDRO_D_FC_0_2025-08-28_17-19-59h7o8x_d0/final_checkpoint/checkpoint-300.tune_metadata b/experiments/ledro_d_fc_7nm_run3/PPO_LEDRO_D_FC_0_2025-08-28_17-19-59h7o8x_d0/final_checkpoint/checkpoint-300.tune_metadata
new file mode 100644
index 0000000..7f9242d
Binary files /dev/null and b/experiments/ledro_d_fc_7nm_run3/PPO_LEDRO_D_FC_0_2025-08-28_17-19-59h7o8x_d0/final_checkpoint/checkpoint-300.tune_metadata differ
diff --git a/experiments/ledro_d_fc_7nm_run3/PPO_LEDRO_D_FC_0_2025-08-28_17-19-59h7o8x_d0/image.png b/experiments/ledro_d_fc_7nm_run3/PPO_LEDRO_D_FC_0_2025-08-28_17-19-59h7o8x_d0/image.png
new file mode 100644
index 0000000..1d017bc
Binary files /dev/null and b/experiments/ledro_d_fc_7nm_run3/PPO_LEDRO_D_FC_0_2025-08-28_17-19-59h7o8x_d0/image.png differ
diff --git a/experiments/ledro_d_fc_7nm_run3/PPO_LEDRO_D_FC_0_2025-08-28_17-19-59h7o8x_d0/params.json b/experiments/ledro_d_fc_7nm_run3/PPO_LEDRO_D_FC_0_2025-08-28_17-19-59h7o8x_d0/params.json
new file mode 100644
index 0000000..9fcb341
--- /dev/null
+++ b/experiments/ledro_d_fc_7nm_run3/PPO_LEDRO_D_FC_0_2025-08-28_17-19-59h7o8x_d0/params.json
@@ -0,0 +1,18 @@
+{
+ "env": "",
+ "env_config": {
+ "generalize": true,
+ "run_valid": false
+ },
+ "horizon": 50,
+ "model": {
+ "fcnet_hiddens": [
+ 128,
+ 128,
+ 128
+ ]
+ },
+ "num_gpus": 0,
+ "num_workers": 3,
+ "train_batch_size": 1200
+}
\ No newline at end of file
diff --git a/experiments/ledro_d_fc_7nm_run3/PPO_LEDRO_D_FC_0_2025-08-28_17-19-59h7o8x_d0/params.pkl b/experiments/ledro_d_fc_7nm_run3/PPO_LEDRO_D_FC_0_2025-08-28_17-19-59h7o8x_d0/params.pkl
new file mode 100644
index 0000000..1e12302
Binary files /dev/null and b/experiments/ledro_d_fc_7nm_run3/PPO_LEDRO_D_FC_0_2025-08-28_17-19-59h7o8x_d0/params.pkl differ
diff --git a/experiments/ledro_d_fc_7nm_run3/PPO_LEDRO_D_FC_0_2025-08-28_17-19-59h7o8x_d0/progress.csv b/experiments/ledro_d_fc_7nm_run3/PPO_LEDRO_D_FC_0_2025-08-28_17-19-59h7o8x_d0/progress.csv
new file mode 100644
index 0000000..723447d
--- /dev/null
+++ b/experiments/ledro_d_fc_7nm_run3/PPO_LEDRO_D_FC_0_2025-08-28_17-19-59h7o8x_d0/progress.csv
@@ -0,0 +1,307 @@
+experiment_id,time_since_restore,episode_len_mean,done,episode_reward_min,iterations_since_restore,episodes_total,timestamp,episode_reward_mean,pid,timesteps_this_iter,date,policy_reward_mean,time_this_iter_s,episodes_this_iter,training_iteration,time_total_s,info,timesteps_total,config,node_ip,num_metric_batches_dropped,custom_metrics,timesteps_since_restore,hostname,episode_reward_max
+7ffa6ff4607a442eb508661143530d5b,231.39491868019104,50.0,False,-146.78393839650298,1,24,1756394647,-129.08330393143353,1566858,1200,2025-08-28_17-24-07,{},231.39491868019104,24,1,231.39491868019104,"{'num_steps_sampled': 1200, 'num_steps_trained': 1200, 'default': {'policy_loss': -0.12120606005191803, 'vf_explained_var': 0.018705738708376884, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 0.20000000298023224, 'vf_loss': 4254.23876953125, 'entropy': 18.654157638549805, 'kl': 0.02240253984928131, 'total_loss': 4254.12255859375}, 'sample_time_ms': 226832.15, 'grad_time_ms': 2279.741, 'load_time_ms': 148.38, 'update_time_ms': 2016.317}",1200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},1200,cda-server-3,-99.99993258306239
+7ffa6ff4607a442eb508661143530d5b,439.889981508255,50.0,False,-146.78393839650298,2,48,1756394856,-127.32490473992193,1566858,1200,2025-08-28_17-27-36,{},208.49506282806396,24,2,439.889981508255,"{'num_steps_sampled': 2400, 'num_steps_trained': 2400, 'default': {'policy_loss': -0.12993724644184113, 'vf_explained_var': 0.11479301005601883, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 0.30000001192092896, 'vf_loss': 3653.26953125, 'entropy': 18.634702682495117, 'kl': 0.023673098534345627, 'total_loss': 3653.146728515625}, 'sample_time_ms': 217313.857, 'grad_time_ms': 1484.979, 'load_time_ms': 74.976, 'update_time_ms': 1009.617}",2400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},2400,cda-server-3,-99.54185984989468
+7ffa6ff4607a442eb508661143530d5b,661.38379073143,50.0,False,-146.78393839650298,3,72,1756395078,-127.62901381105137,1566858,1200,2025-08-28_17-31-18,{},221.49380922317505,24,3,661.38379073143,"{'num_steps_sampled': 3600, 'num_steps_trained': 3600, 'default': {'policy_loss': -0.13941305875778198, 'vf_explained_var': 0.11090646684169769, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 0.44999995827674866, 'vf_loss': 3644.2900390625, 'entropy': 18.60210418701172, 'kl': 0.02471771091222763, 'total_loss': 3644.161865234375}, 'sample_time_ms': 218474.294, 'grad_time_ms': 1219.646, 'load_time_ms': 50.629, 'update_time_ms': 673.919}",3600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},3600,cda-server-3,-99.54185984989468
+7ffa6ff4607a442eb508661143530d5b,891.4586873054504,50.0,False,-146.78393839650298,4,96,1756395308,-125.83527627708632,1566858,1200,2025-08-28_17-35-08,{},230.07489657402039,24,4,891.4586873054504,"{'num_steps_sampled': 4800, 'num_steps_trained': 4800, 'default': {'policy_loss': -0.12359528988599777, 'vf_explained_var': 0.11000010371208191, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 0.675000011920929, 'vf_loss': 3012.851806640625, 'entropy': 18.575050354003906, 'kl': 0.019558193162083626, 'total_loss': 3012.7412109375}, 'sample_time_ms': 221199.687, 'grad_time_ms': 1086.971, 'load_time_ms': 38.45, 'update_time_ms': 506.159}",4800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},4800,cda-server-3,-98.49905122783261
+7ffa6ff4607a442eb508661143530d5b,1150.402874469757,50.0,False,-146.0541023313413,5,120,1756395567,-124.10875304099744,1566858,1200,2025-08-28_17-39-27,{},258.94418716430664,24,5,1150.402874469757,"{'num_steps_sampled': 6000, 'num_steps_trained': 6000, 'default': {'policy_loss': -0.12141091376543045, 'vf_explained_var': 0.05904542654752731, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 0.675000011920929, 'vf_loss': 2893.345703125, 'entropy': 18.560523986816406, 'kl': 0.0196517501026392, 'total_loss': 2893.237548828125}, 'sample_time_ms': 228606.987, 'grad_time_ms': 1009.142, 'load_time_ms': 31.135, 'update_time_ms': 405.444}",6000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},6000,cda-server-3,-98.49905122783261
+7ffa6ff4607a442eb508661143530d5b,1408.9546167850494,50.0,False,-146.0541023313413,6,144,1756395825,-122.72888846822445,1566858,1200,2025-08-28_17-43-45,{},258.55174231529236,24,6,1408.9546167850494,"{'num_steps_sampled': 7200, 'num_steps_trained': 7200, 'default': {'policy_loss': -0.13973921537399292, 'vf_explained_var': 0.011485014110803604, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 0.675000011920929, 'vf_loss': 2471.14990234375, 'entropy': 18.532447814941406, 'kl': 0.019554639235138893, 'total_loss': 2471.023193359375}, 'sample_time_ms': 233480.97, 'grad_time_ms': 956.137, 'load_time_ms': 26.219, 'update_time_ms': 338.316}",7200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},7200,cda-server-3,-98.49905122783261
+7ffa6ff4607a442eb508661143530d5b,1674.9645681381226,50.0,False,-146.44951359018535,7,168,1756396091,-121.15475903464372,1566858,1200,2025-08-28_17-48-11,{},266.0099513530731,24,7,1674.9645681381226,"{'num_steps_sampled': 8400, 'num_steps_trained': 8400, 'default': {'policy_loss': -0.13989777863025665, 'vf_explained_var': 0.01834733597934246, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 0.675000011920929, 'vf_loss': 2349.50146484375, 'entropy': 18.50861358642578, 'kl': 0.02123822271823883, 'total_loss': 2349.376220703125}, 'sample_time_ms': 238027.997, 'grad_time_ms': 918.38, 'load_time_ms': 22.601, 'update_time_ms': 290.334}",8400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},8400,cda-server-3,-98.49905122783261
+7ffa6ff4607a442eb508661143530d5b,1941.3925409317017,50.0,False,-146.44951359018535,8,192,1756396358,-120.81588605798613,1566858,1200,2025-08-28_17-52-38,{},266.4279727935791,24,8,1941.3925409317017,"{'num_steps_sampled': 9600, 'num_steps_trained': 9600, 'default': {'policy_loss': -0.12228532880544662, 'vf_explained_var': 0.009332027286291122, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 2376.759521484375, 'entropy': 18.485597610473633, 'kl': 0.017213426530361176, 'total_loss': 2376.654541015625}, 'sample_time_ms': 241490.303, 'grad_time_ms': 890.103, 'load_time_ms': 19.985, 'update_time_ms': 254.345}",9600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},9600,cda-server-3,-88.09294395093761
+7ffa6ff4607a442eb508661143530d5b,2161.997076511383,50.0,False,-146.83873104448023,9,216,1756396578,-119.28984459236621,1566858,1200,2025-08-28_17-56-18,{},220.6045355796814,24,9,2161.997076511383,"{'num_steps_sampled': 10800, 'num_steps_trained': 10800, 'default': {'policy_loss': -0.13750998675823212, 'vf_explained_var': 0.00047000250197015703, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 1895.60546875, 'entropy': 18.46615219116211, 'kl': 0.017844107002019882, 'total_loss': 1895.48583984375}, 'sample_time_ms': 239091.05, 'grad_time_ms': 868.78, 'load_time_ms': 17.938, 'update_time_ms': 226.372}",10800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},10800,cda-server-3,-88.09294395093761
+7ffa6ff4607a442eb508661143530d5b,2368.387995481491,50.0,False,-146.83873104448023,10,240,1756396785,-118.7602112144562,1566858,1200,2025-08-28_17-59-45,{},206.39091897010803,24,10,2368.387995481491,"{'num_steps_sampled': 12000, 'num_steps_trained': 12000, 'default': {'policy_loss': -0.12456995993852615, 'vf_explained_var': 0.041680652648210526, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 1867.90771484375, 'entropy': 18.442859649658203, 'kl': 0.018277890980243683, 'total_loss': 1867.8016357421875}, 'sample_time_ms': 235750.178, 'grad_time_ms': 851.815, 'load_time_ms': 16.329, 'update_time_ms': 203.983}",12000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},12000,cda-server-3,-88.09294395093761
+7ffa6ff4607a442eb508661143530d5b,2639.416999101639,50.0,False,-146.83873104448023,11,264,1756397056,-118.20574028935748,1566858,1200,2025-08-28_18-04-16,{},271.0290036201477,24,11,2639.416999101639,"{'num_steps_sampled': 13200, 'num_steps_trained': 13200, 'default': {'policy_loss': -0.1394842118024826, 'vf_explained_var': 0.02399369142949581, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 1760.54541015625, 'entropy': 18.41582489013672, 'kl': 0.0173909030854702, 'total_loss': 1760.423583984375}, 'sample_time_ms': 240099.86, 'grad_time_ms': 692.911, 'load_time_ms': 1.659, 'update_time_ms': 2.59}",13200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},13200,cda-server-3,-88.09294395093761
+7ffa6ff4607a442eb508661143530d5b,2889.085036754608,50.0,False,-146.83873104448023,12,288,1756397305,-116.34781812997744,1566858,1200,2025-08-28_18-08-25,{},249.66803765296936,24,12,2889.085036754608,"{'num_steps_sampled': 14400, 'num_steps_trained': 14400, 'default': {'policy_loss': -0.131776362657547, 'vf_explained_var': 0.08143580704927444, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 1517.3621826171875, 'entropy': 18.392175674438477, 'kl': 0.015726102516055107, 'total_loss': 1517.24609375}, 'sample_time_ms': 244216.386, 'grad_time_ms': 693.713, 'load_time_ms': 1.664, 'update_time_ms': 2.543}",14400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},14400,cda-server-3,-95.21420483749228
+7ffa6ff4607a442eb508661143530d5b,3180.254895925522,50.0,False,-142.98441497447922,13,312,1756397596,-115.62934410428164,1566858,1200,2025-08-28_18-13-16,{},291.1698591709137,24,13,3180.254895925522,"{'num_steps_sampled': 15600, 'num_steps_trained': 15600, 'default': {'policy_loss': -0.14231985807418823, 'vf_explained_var': 0.08726880699396133, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 1588.935791015625, 'entropy': 18.384498596191406, 'kl': 0.01738560199737549, 'total_loss': 1588.8111572265625}, 'sample_time_ms': 251184.496, 'grad_time_ms': 693.321, 'load_time_ms': 1.568, 'update_time_ms': 2.557}",15600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},15600,cda-server-3,-83.97588886261303
+7ffa6ff4607a442eb508661143530d5b,3432.3409848213196,50.0,False,-143.8383056089926,14,336,1756397849,-115.66906308452896,1566858,1200,2025-08-28_18-17-29,{},252.08608889579773,24,14,3432.3409848213196,"{'num_steps_sampled': 16800, 'num_steps_trained': 16800, 'default': {'policy_loss': -0.13075391948223114, 'vf_explained_var': 0.10596006363630295, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 1400.822509765625, 'entropy': 18.35945701599121, 'kl': 0.015562936663627625, 'total_loss': 1400.7073974609375}, 'sample_time_ms': 253385.056, 'grad_time_ms': 693.955, 'load_time_ms': 1.52, 'update_time_ms': 2.523}",16800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},16800,cda-server-3,-83.97588886261303
+7ffa6ff4607a442eb508661143530d5b,3690.3307423591614,50.0,False,-143.8383056089926,15,360,1756398107,-114.85649792242968,1566858,1200,2025-08-28_18-21-47,{},257.9897575378418,24,15,3690.3307423591614,"{'num_steps_sampled': 18000, 'num_steps_trained': 18000, 'default': {'policy_loss': -0.13020434975624084, 'vf_explained_var': 0.17911416292190552, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 1270.36865234375, 'entropy': 18.33188819885254, 'kl': 0.01751522161066532, 'total_loss': 1270.256103515625}, 'sample_time_ms': 253290.003, 'grad_time_ms': 693.672, 'load_time_ms': 1.478, 'update_time_ms': 2.533}",18000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},18000,cda-server-3,-83.97588886261303
+7ffa6ff4607a442eb508661143530d5b,3912.750263929367,50.0,False,-143.8383056089926,16,384,1756398329,-114.61370286216462,1566858,1200,2025-08-28_18-25-29,{},222.4195215702057,24,16,3912.750263929367,"{'num_steps_sampled': 19200, 'num_steps_trained': 19200, 'default': {'policy_loss': -0.13605083525180817, 'vf_explained_var': 0.17312727868556976, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 1203.89111328125, 'entropy': 18.3139591217041, 'kl': 0.017916101962327957, 'total_loss': 1203.7730712890625}, 'sample_time_ms': 249676.023, 'grad_time_ms': 694.436, 'load_time_ms': 1.49, 'update_time_ms': 2.497}",19200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},19200,cda-server-3,-83.97588886261303
+7ffa6ff4607a442eb508661143530d5b,4148.901806116104,50.0,False,-143.8383056089926,17,408,1756398565,-115.1950941298017,1566858,1200,2025-08-28_18-29-25,{},236.15154218673706,24,17,4148.901806116104,"{'num_steps_sampled': 20400, 'num_steps_trained': 20400, 'default': {'policy_loss': -0.1500139832496643, 'vf_explained_var': 0.20809487998485565, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 1293.311767578125, 'entropy': 18.267717361450195, 'kl': 0.019330434501171112, 'total_loss': 1293.18115234375}, 'sample_time_ms': 246689.11, 'grad_time_ms': 695.366, 'load_time_ms': 1.557, 'update_time_ms': 2.53}",20400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},20400,cda-server-3,-98.04220398724607
+7ffa6ff4607a442eb508661143530d5b,4419.96648812294,50.0,False,-145.8632685496317,18,432,1756398836,-114.82608095291198,1566858,1200,2025-08-28_18-33-56,{},271.06468200683594,24,18,4419.96648812294,"{'num_steps_sampled': 21600, 'num_steps_trained': 21600, 'default': {'policy_loss': -0.1369973123073578, 'vf_explained_var': 0.21514759957790375, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 1099.616943359375, 'entropy': 18.250120162963867, 'kl': 0.01694124937057495, 'total_loss': 1099.4969482421875}, 'sample_time_ms': 247152.753, 'grad_time_ms': 695.384, 'load_time_ms': 1.549, 'update_time_ms': 2.557}",21600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},21600,cda-server-3,-94.07099127019934
+7ffa6ff4607a442eb508661143530d5b,4666.24494099617,50.0,False,-145.8632685496317,19,456,1756399083,-113.63070519496996,1566858,1200,2025-08-28_18-38-03,{},246.27845287322998,24,19,4666.24494099617,"{'num_steps_sampled': 22800, 'num_steps_trained': 22800, 'default': {'policy_loss': -0.13419102132320404, 'vf_explained_var': 0.23938888311386108, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 996.186279296875, 'entropy': 18.23851776123047, 'kl': 0.01877405494451523, 'total_loss': 996.071044921875}, 'sample_time_ms': 249720.524, 'grad_time_ms': 694.96, 'load_time_ms': 1.56, 'update_time_ms': 2.58}",22800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},22800,cda-server-3,-92.51656606985235
+7ffa6ff4607a442eb508661143530d5b,4908.511640548706,50.0,False,-145.8632685496317,20,480,1756399325,-113.29456813555431,1566858,1200,2025-08-28_18-42-05,{},242.266699552536,24,20,4908.511640548706,"{'num_steps_sampled': 24000, 'num_steps_trained': 24000, 'default': {'policy_loss': -0.1307111382484436, 'vf_explained_var': 0.3056492805480957, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 885.0294189453125, 'entropy': 18.227909088134766, 'kl': 0.017692746594548225, 'total_loss': 884.9165649414062}, 'sample_time_ms': 253308.428, 'grad_time_ms': 694.623, 'load_time_ms': 1.537, 'update_time_ms': 2.601}",24000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},24000,cda-server-3,-92.51656606985235
+7ffa6ff4607a442eb508661143530d5b,5115.891381978989,50.0,False,-145.8632685496317,21,504,1756399532,-112.84483958739845,1566858,1200,2025-08-28_18-45-32,{},207.3797414302826,24,21,5115.891381978989,"{'num_steps_sampled': 25200, 'num_steps_trained': 25200, 'default': {'policy_loss': -0.14466862380504608, 'vf_explained_var': 0.31529197096824646, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 974.6930541992188, 'entropy': 18.17812156677246, 'kl': 0.017108624801039696, 'total_loss': 974.5657348632812}, 'sample_time_ms': 246943.138, 'grad_time_ms': 695.042, 'load_time_ms': 1.535, 'update_time_ms': 2.59}",25200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},25200,cda-server-3,-92.51656606985235
+7ffa6ff4607a442eb508661143530d5b,5416.202656984329,50.0,False,-145.31539173741282,22,528,1756399832,-110.93474544247985,1566858,1200,2025-08-28_18-50-32,{},300.3112750053406,24,22,5416.202656984329,"{'num_steps_sampled': 26400, 'num_steps_trained': 26400, 'default': {'policy_loss': -0.13921838998794556, 'vf_explained_var': 0.35455378890037537, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 688.4326171875, 'entropy': 18.171295166015625, 'kl': 0.016766492277383804, 'total_loss': 688.3103637695312}, 'sample_time_ms': 252007.87, 'grad_time_ms': 694.557, 'load_time_ms': 1.591, 'update_time_ms': 2.634}",26400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},26400,cda-server-3,-89.64457416011744
+7ffa6ff4607a442eb508661143530d5b,5694.230200052261,50.0,False,-144.2697605141167,23,552,1756400111,-110.89697706022662,1566858,1200,2025-08-28_18-55-11,{},278.02754306793213,24,23,5694.230200052261,"{'num_steps_sampled': 27600, 'num_steps_trained': 27600, 'default': {'policy_loss': -0.13763722777366638, 'vf_explained_var': 0.3888266980648041, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 731.5033569335938, 'entropy': 18.151676177978516, 'kl': 0.01764022745192051, 'total_loss': 731.3836059570312}, 'sample_time_ms': 250692.213, 'grad_time_ms': 695.901, 'load_time_ms': 1.648, 'update_time_ms': 2.636}",27600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},27600,cda-server-3,-88.93574451772085
+7ffa6ff4607a442eb508661143530d5b,5979.111471414566,50.0,False,-143.04836334373098,24,576,1756400395,-111.36213150222491,1566858,1200,2025-08-28_18-59-55,{},284.8812713623047,24,24,5979.111471414566,"{'num_steps_sampled': 28800, 'num_steps_trained': 28800, 'default': {'policy_loss': -0.1553221344947815, 'vf_explained_var': 0.3876085877418518, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 800.6702270507812, 'entropy': 18.09413719177246, 'kl': 0.018143318593502045, 'total_loss': 800.5332641601562}, 'sample_time_ms': 253971.919, 'grad_time_ms': 695.703, 'load_time_ms': 1.672, 'update_time_ms': 2.644}",28800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},28800,cda-server-3,-88.93574451772085
+7ffa6ff4607a442eb508661143530d5b,6190.038968324661,50.0,False,-142.52618813170668,25,600,1756400606,-110.11096078319713,1566858,1200,2025-08-28_19-03-26,{},210.92749691009521,24,25,6190.038968324661,"{'num_steps_sampled': 30000, 'num_steps_trained': 30000, 'default': {'policy_loss': -0.13792775571346283, 'vf_explained_var': 0.2672511339187622, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 757.8585205078125, 'entropy': 18.086666107177734, 'kl': 0.017636993899941444, 'total_loss': 757.7384643554688}, 'sample_time_ms': 249265.378, 'grad_time_ms': 695.932, 'load_time_ms': 1.705, 'update_time_ms': 2.651}",30000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},30000,cda-server-3,-87.96881449444385
+7ffa6ff4607a442eb508661143530d5b,6434.35960817337,50.0,False,-142.52618813170668,26,624,1756400851,-108.79258472972552,1566858,1200,2025-08-28_19-07-31,{},244.3206398487091,24,26,6434.35960817337,"{'num_steps_sampled': 31200, 'num_steps_trained': 31200, 'default': {'policy_loss': -0.13855737447738647, 'vf_explained_var': 0.3324964642524719, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 534.9935913085938, 'entropy': 18.05270004272461, 'kl': 0.015438605099916458, 'total_loss': 534.8707275390625}, 'sample_time_ms': 251456.213, 'grad_time_ms': 695.196, 'load_time_ms': 1.709, 'update_time_ms': 2.694}",31200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},31200,cda-server-3,-87.96881449444385
+7ffa6ff4607a442eb508661143530d5b,6672.771792173386,50.0,False,-141.16678514474953,27,648,1756401089,-106.84075375099816,1566858,1200,2025-08-28_19-11-29,{},238.41218400001526,24,27,6672.771792173386,"{'num_steps_sampled': 32400, 'num_steps_trained': 32400, 'default': {'policy_loss': -0.14445364475250244, 'vf_explained_var': 0.39279234409332275, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 483.4596862792969, 'entropy': 18.03901481628418, 'kl': 0.016610559076070786, 'total_loss': 483.33209228515625}, 'sample_time_ms': 251682.44, 'grad_time_ms': 695.081, 'load_time_ms': 1.711, 'update_time_ms': 2.652}",32400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},32400,cda-server-3,-87.96881449444385
+7ffa6ff4607a442eb508661143530d5b,6921.277290582657,50.0,False,-140.2992540424679,28,672,1756401338,-104.82396678370964,1566858,1200,2025-08-28_19-15-38,{},248.50549840927124,24,28,6921.277290582657,"{'num_steps_sampled': 33600, 'num_steps_trained': 33600, 'default': {'policy_loss': -0.1346297711133957, 'vf_explained_var': 0.4250890910625458, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 527.1061401367188, 'entropy': 18.05461883544922, 'kl': 0.016484878957271576, 'total_loss': 526.9881591796875}, 'sample_time_ms': 249426.827, 'grad_time_ms': 694.746, 'load_time_ms': 1.73, 'update_time_ms': 2.652}",33600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},33600,cda-server-3,-87.96881449444385
+7ffa6ff4607a442eb508661143530d5b,7166.122593641281,50.0,False,-143.76604889515352,29,696,1756401582,-103.82999386622753,1566858,1200,2025-08-28_19-19-42,{},244.84530305862427,24,29,7166.122593641281,"{'num_steps_sampled': 34800, 'num_steps_trained': 34800, 'default': {'policy_loss': -0.1438552737236023, 'vf_explained_var': 0.28951722383499146, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 612.649658203125, 'entropy': 18.002059936523438, 'kl': 0.016011489555239677, 'total_loss': 612.5220336914062}, 'sample_time_ms': 249283.478, 'grad_time_ms': 694.771, 'load_time_ms': 1.725, 'update_time_ms': 2.634}",34800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},34800,cda-server-3,-88.04797756183808
+7ffa6ff4607a442eb508661143530d5b,7463.127463102341,50.0,False,-143.76604889515352,30,720,1756401879,-103.3184289517542,1566858,1200,2025-08-28_19-24-39,{},297.00486946105957,24,30,7463.127463102341,"{'num_steps_sampled': 36000, 'num_steps_trained': 36000, 'default': {'policy_loss': -0.1339775025844574, 'vf_explained_var': 0.4751656949520111, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 410.6561279296875, 'entropy': 17.993558883666992, 'kl': 0.01672077737748623, 'total_loss': 410.5390625}, 'sample_time_ms': 254757.672, 'grad_time_ms': 694.401, 'load_time_ms': 1.736, 'update_time_ms': 2.615}",36000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},36000,cda-server-3,-88.04797756183808
+7ffa6ff4607a442eb508661143530d5b,7693.591760635376,50.0,False,-143.76604889515352,31,744,1756402110,-103.7415526760245,1566858,1200,2025-08-28_19-28-30,{},230.46429753303528,24,31,7693.591760635376,"{'num_steps_sampled': 37200, 'num_steps_trained': 37200, 'default': {'policy_loss': -0.13384594023227692, 'vf_explained_var': 0.3274219036102295, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 553.2936401367188, 'entropy': 17.990142822265625, 'kl': 0.017523042857646942, 'total_loss': 553.177490234375}, 'sample_time_ms': 257066.525, 'grad_time_ms': 693.991, 'load_time_ms': 1.726, 'update_time_ms': 2.629}",37200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},37200,cda-server-3,-83.96823218772687
+7ffa6ff4607a442eb508661143530d5b,7949.828924655914,50.0,False,-143.76604889515352,32,768,1756402366,-102.33990607931862,1566858,1200,2025-08-28_19-32-46,{},256.23716402053833,24,32,7949.828924655914,"{'num_steps_sampled': 38400, 'num_steps_trained': 38400, 'default': {'policy_loss': -0.13505858182907104, 'vf_explained_var': -0.02648478001356125, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 648.9166870117188, 'entropy': 17.949193954467773, 'kl': 0.015016328543424606, 'total_loss': 648.7968139648438}, 'sample_time_ms': 252659.187, 'grad_time_ms': 693.963, 'load_time_ms': 1.673, 'update_time_ms': 2.595}",38400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},38400,cda-server-3,-82.06553763454826
+7ffa6ff4607a442eb508661143530d5b,8189.958149909973,50.0,False,-141.17269706060515,33,792,1756402606,-103.5668895180602,1566858,1200,2025-08-28_19-36-46,{},240.12922525405884,24,33,8189.958149909973,"{'num_steps_sampled': 39600, 'num_steps_trained': 39600, 'default': {'policy_loss': -0.13893601298332214, 'vf_explained_var': 0.5408138036727905, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 424.24908447265625, 'entropy': 17.949119567871094, 'kl': 0.017221523448824883, 'total_loss': 424.1275634765625}, 'sample_time_ms': 248869.829, 'grad_time_ms': 693.6, 'load_time_ms': 1.623, 'update_time_ms': 2.574}",39600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},39600,cda-server-3,-82.06553763454826
+7ffa6ff4607a442eb508661143530d5b,8483.517776966095,50.0,False,-143.58513812624415,34,816,1756402900,-104.27082951918139,1566858,1200,2025-08-28_19-41-40,{},293.5596270561218,24,34,8483.517776966095,"{'num_steps_sampled': 40800, 'num_steps_trained': 40800, 'default': {'policy_loss': -0.1295945793390274, 'vf_explained_var': 0.5013567805290222, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 432.3161315917969, 'entropy': 17.887298583984375, 'kl': 0.01532017532736063, 'total_loss': 432.2020263671875}, 'sample_time_ms': 249737.248, 'grad_time_ms': 694.012, 'load_time_ms': 1.621, 'update_time_ms': 2.585}",40800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},40800,cda-server-3,-82.06553763454826
+7ffa6ff4607a442eb508661143530d5b,8723.819400072098,50.0,False,-148.21402368422488,35,840,1756403140,-103.90413005160178,1566858,1200,2025-08-28_19-45-40,{},240.3016231060028,24,35,8723.819400072098,"{'num_steps_sampled': 42000, 'num_steps_trained': 42000, 'default': {'policy_loss': -0.11818749457597733, 'vf_explained_var': 0.43253830075263977, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 439.0633239746094, 'entropy': 17.906269073486328, 'kl': 0.014970477670431137, 'total_loss': 438.9602966308594}, 'sample_time_ms': 252674.46, 'grad_time_ms': 694.241, 'load_time_ms': 1.623, 'update_time_ms': 2.579}",42000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},42000,cda-server-3,-82.06553763454826
+7ffa6ff4607a442eb508661143530d5b,8991.142573833466,50.0,False,-148.21402368422488,36,864,1756403408,-105.02438479051513,1566858,1200,2025-08-28_19-50-08,{},267.3231737613678,24,36,8991.142573833466,"{'num_steps_sampled': 43200, 'num_steps_trained': 43200, 'default': {'policy_loss': -0.13264299929141998, 'vf_explained_var': 0.5789927244186401, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 339.433837890625, 'entropy': 17.89673614501953, 'kl': 0.016630493104457855, 'total_loss': 339.31805419921875}, 'sample_time_ms': 254973.588, 'grad_time_ms': 695.398, 'load_time_ms': 1.612, 'update_time_ms': 2.552}",43200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},43200,cda-server-3,-86.89631256715614
+7ffa6ff4607a442eb508661143530d5b,9251.871697187424,50.0,False,-148.21402368422488,37,888,1756403668,-104.76125330698889,1566858,1200,2025-08-28_19-54-28,{},260.72912335395813,24,37,9251.871697187424,"{'num_steps_sampled': 44400, 'num_steps_trained': 44400, 'default': {'policy_loss': -0.13514705002307892, 'vf_explained_var': 0.5892637968063354, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 338.72479248046875, 'entropy': 17.81587028503418, 'kl': 0.017263438552618027, 'total_loss': 338.6070861816406}, 'sample_time_ms': 257205.886, 'grad_time_ms': 694.738, 'load_time_ms': 1.618, 'update_time_ms': 2.592}",44400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},44400,cda-server-3,-86.89631256715614
+7ffa6ff4607a442eb508661143530d5b,9515.561694860458,50.0,False,-148.21402368422488,38,912,1756403932,-102.16851522701262,1566858,1200,2025-08-28_19-58-52,{},263.68999767303467,24,38,9515.561694860458,"{'num_steps_sampled': 45600, 'num_steps_trained': 45600, 'default': {'policy_loss': -0.13061577081680298, 'vf_explained_var': 0.6929558515548706, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 212.41424560546875, 'entropy': 17.837133407592773, 'kl': 0.016778942197561264, 'total_loss': 212.30059814453125}, 'sample_time_ms': 258723.305, 'grad_time_ms': 695.81, 'load_time_ms': 1.603, 'update_time_ms': 2.598}",45600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},45600,cda-server-3,-85.2176874172706
+7ffa6ff4607a442eb508661143530d5b,9773.700018405914,50.0,False,-129.5024379654697,39,936,1756404190,-101.25596506657908,1566858,1200,2025-08-28_20-03-10,{},258.13832354545593,24,39,9773.700018405914,"{'num_steps_sampled': 46800, 'num_steps_trained': 46800, 'default': {'policy_loss': -0.15072497725486755, 'vf_explained_var': 0.7493559122085571, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 181.5271453857422, 'entropy': 17.8424129486084, 'kl': 0.016762135550379753, 'total_loss': 181.3933868408203}, 'sample_time_ms': 260052.985, 'grad_time_ms': 695.428, 'load_time_ms': 1.613, 'update_time_ms': 2.596}",46800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},46800,cda-server-3,-85.2176874172706
+7ffa6ff4607a442eb508661143530d5b,10019.42602467537,50.0,False,-134.83725819359995,40,960,1756404436,-100.88615860699981,1566858,1200,2025-08-28_20-07-16,{},245.72600626945496,24,40,10019.42602467537,"{'num_steps_sampled': 48000, 'num_steps_trained': 48000, 'default': {'policy_loss': -0.14604660868644714, 'vf_explained_var': 0.7916211485862732, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 139.4286346435547, 'entropy': 17.7719783782959, 'kl': 0.017864830791950226, 'total_loss': 139.30067443847656}, 'sample_time_ms': 254924.671, 'grad_time_ms': 695.871, 'load_time_ms': 1.6, 'update_time_ms': 2.606}",48000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},48000,cda-server-3,-85.2176874172706
+7ffa6ff4607a442eb508661143530d5b,10276.95909500122,50.0,False,-134.83725819359995,41,984,1756404693,-99.97430550424826,1566858,1200,2025-08-28_20-11-33,{},257.53307032585144,24,41,10276.95909500122,"{'num_steps_sampled': 49200, 'num_steps_trained': 49200, 'default': {'policy_loss': -0.1547583043575287, 'vf_explained_var': 0.7901754379272461, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 130.47581481933594, 'entropy': 17.793487548828125, 'kl': 0.016820203512907028, 'total_loss': 130.3380889892578}, 'sample_time_ms': 257630.172, 'grad_time_ms': 697.229, 'load_time_ms': 1.608, 'update_time_ms': 2.586}",49200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},49200,cda-server-3,-81.97190564989381
+7ffa6ff4607a442eb508661143530d5b,10532.508011579514,50.0,False,-134.83725819359995,42,1008,1756404949,-100.59323159474148,1566858,1200,2025-08-28_20-15-49,{},255.54891657829285,24,42,10532.508011579514,"{'num_steps_sampled': 50400, 'num_steps_trained': 50400, 'default': {'policy_loss': -0.1485620141029358, 'vf_explained_var': 0.8014824986457825, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 130.30760192871094, 'entropy': 17.76481819152832, 'kl': 0.016707023605704308, 'total_loss': 130.17596435546875}, 'sample_time_ms': 257561.944, 'grad_time_ms': 696.633, 'load_time_ms': 1.607, 'update_time_ms': 2.571}",50400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},50400,cda-server-3,-81.97190564989381
+7ffa6ff4607a442eb508661143530d5b,10790.558824539185,50.0,False,-144.4774719951156,43,1032,1756405207,-101.62419135575888,1566858,1200,2025-08-28_20-20-07,{},258.050812959671,24,43,10790.558824539185,"{'num_steps_sampled': 51600, 'num_steps_trained': 51600, 'default': {'policy_loss': -0.1260344386100769, 'vf_explained_var': 0.7056383490562439, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 193.17147827148438, 'entropy': 17.764888763427734, 'kl': 0.01669412851333618, 'total_loss': 193.0623321533203}, 'sample_time_ms': 259353.801, 'grad_time_ms': 696.863, 'load_time_ms': 1.658, 'update_time_ms': 2.593}",51600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},51600,cda-server-3,-81.97190564989381
+7ffa6ff4607a442eb508661143530d5b,11004.175188064575,50.0,False,-144.4774719951156,44,1056,1756405421,-101.60767664423524,1566858,1200,2025-08-28_20-23-41,{},213.61636352539062,24,44,11004.175188064575,"{'num_steps_sampled': 52800, 'num_steps_trained': 52800, 'default': {'policy_loss': -0.148858442902565, 'vf_explained_var': 0.7899549007415771, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 127.62850952148438, 'entropy': 17.72422981262207, 'kl': 0.017617570236325264, 'total_loss': 127.49748992919922}, 'sample_time_ms': 251359.431, 'grad_time_ms': 696.923, 'load_time_ms': 1.654, 'update_time_ms': 2.562}",52800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},52800,cda-server-3,-81.97190564989381
+7ffa6ff4607a442eb508661143530d5b,11276.314458370209,50.0,False,-144.4774719951156,45,1080,1756405693,-101.40711573503677,1566858,1200,2025-08-28_20-28-13,{},272.13927030563354,24,45,11276.314458370209,"{'num_steps_sampled': 54000, 'num_steps_trained': 54000, 'default': {'policy_loss': -0.13800571858882904, 'vf_explained_var': 0.8040595054626465, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 110.159912109375, 'entropy': 17.731359481811523, 'kl': 0.017722077667713165, 'total_loss': 110.03984832763672}, 'sample_time_ms': 254543.995, 'grad_time_ms': 696.108, 'load_time_ms': 1.667, 'update_time_ms': 2.57}",54000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},54000,cda-server-3,-80.96407251778136
+7ffa6ff4607a442eb508661143530d5b,11516.05266880989,50.0,False,-144.4774719951156,46,1104,1756405933,-101.97060669596017,1566858,1200,2025-08-28_20-32-13,{},239.738210439682,24,46,11516.05266880989,"{'num_steps_sampled': 55200, 'num_steps_trained': 55200, 'default': {'policy_loss': -0.15162310004234314, 'vf_explained_var': 0.8339415788650513, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 126.8819580078125, 'entropy': 17.67001724243164, 'kl': 0.018563542515039444, 'total_loss': 126.74913024902344}, 'sample_time_ms': 251785.723, 'grad_time_ms': 695.899, 'load_time_ms': 1.643, 'update_time_ms': 2.599}",55200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},55200,cda-server-3,-72.23740427864698
+7ffa6ff4607a442eb508661143530d5b,11774.868111371994,50.0,False,-135.64386258019744,47,1128,1756406191,-100.08848784529565,1566858,1200,2025-08-28_20-36-31,{},258.81544256210327,24,47,11774.868111371994,"{'num_steps_sampled': 56400, 'num_steps_trained': 56400, 'default': {'policy_loss': -0.14888200163841248, 'vf_explained_var': 0.8016077280044556, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 115.84388732910156, 'entropy': 17.718650817871094, 'kl': 0.017319880425930023, 'total_loss': 115.7125244140625}, 'sample_time_ms': 251593.678, 'grad_time_ms': 696.586, 'load_time_ms': 1.642, 'update_time_ms': 2.592}",56400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},56400,cda-server-3,-69.29839344771064
+7ffa6ff4607a442eb508661143530d5b,12001.616872549057,50.0,False,-135.64386258019744,48,1152,1756406418,-99.68269586736893,1566858,1200,2025-08-28_20-40-18,{},226.748761177063,24,48,12001.616872549057,"{'num_steps_sampled': 57600, 'num_steps_trained': 57600, 'default': {'policy_loss': -0.15360401570796967, 'vf_explained_var': 0.8236192464828491, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 103.33358001708984, 'entropy': 17.707372665405273, 'kl': 0.019352156668901443, 'total_loss': 103.19956970214844}, 'sample_time_ms': 247900.738, 'grad_time_ms': 695.412, 'load_time_ms': 1.626, 'update_time_ms': 2.588}",57600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},57600,cda-server-3,-69.29839344771064
+7ffa6ff4607a442eb508661143530d5b,12236.425989627838,50.0,False,-135.64386258019744,49,1176,1756406653,-98.22463176781638,1566858,1200,2025-08-28_20-44-13,{},234.80911707878113,24,49,12236.425989627838,"{'num_steps_sampled': 58800, 'num_steps_trained': 58800, 'default': {'policy_loss': -0.1393449306488037, 'vf_explained_var': 0.8465521335601807, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 76.96279907226562, 'entropy': 17.65727996826172, 'kl': 0.017094898968935013, 'total_loss': 76.84076690673828}, 'sample_time_ms': 245567.736, 'grad_time_ms': 695.671, 'load_time_ms': 1.564, 'update_time_ms': 2.572}",58800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},58800,cda-server-3,-69.29839344771064
+7ffa6ff4607a442eb508661143530d5b,12468.61930012703,50.0,False,-135.64386258019744,50,1200,1756406885,-96.12076030956199,1566858,1200,2025-08-28_20-48-05,{},232.19331049919128,24,50,12468.61930012703,"{'num_steps_sampled': 60000, 'num_steps_trained': 60000, 'default': {'policy_loss': -0.14331084489822388, 'vf_explained_var': 0.9255598783493042, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 37.3577995300293, 'entropy': 17.61494255065918, 'kl': 0.018930919468402863, 'total_loss': 37.2336540222168}, 'sample_time_ms': 244214.895, 'grad_time_ms': 695.213, 'load_time_ms': 1.565, 'update_time_ms': 2.591}",60000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},60000,cda-server-3,-69.29839344771064
+7ffa6ff4607a442eb508661143530d5b,12709.341829061508,50.0,False,-132.3752722797274,51,1224,1756407126,-95.85087433939978,1566858,1200,2025-08-28_20-52-06,{},240.72252893447876,24,51,12709.341829061508,"{'num_steps_sampled': 61200, 'num_steps_trained': 61200, 'default': {'policy_loss': -0.14188522100448608, 'vf_explained_var': 0.8768813610076904, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 71.55755615234375, 'entropy': 17.609588623046875, 'kl': 0.018651418387889862, 'total_loss': 71.43455505371094}, 'sample_time_ms': 242534.376, 'grad_time_ms': 694.668, 'load_time_ms': 1.564, 'update_time_ms': 2.595}",61200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},61200,cda-server-3,-74.95478802659025
+7ffa6ff4607a442eb508661143530d5b,12948.8257188797,50.0,False,-132.3752722797274,52,1248,1756407365,-94.87060266743552,1566858,1200,2025-08-28_20-56-05,{},239.48388981819153,24,52,12948.8257188797,"{'num_steps_sampled': 62400, 'num_steps_trained': 62400, 'default': {'policy_loss': -0.1256731152534485, 'vf_explained_var': 0.8760194182395935, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 63.51656723022461, 'entropy': 17.582059860229492, 'kl': 0.01717858947813511, 'total_loss': 63.40829086303711}, 'sample_time_ms': 240927.317, 'grad_time_ms': 695.266, 'load_time_ms': 1.531, 'update_time_ms': 2.61}",62400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},62400,cda-server-3,-74.95383250565217
+7ffa6ff4607a442eb508661143530d5b,13182.688966751099,50.0,False,-132.3752722797274,53,1272,1756407599,-95.34690342570403,1566858,1200,2025-08-28_20-59-59,{},233.86324787139893,24,53,13182.688966751099,"{'num_steps_sampled': 63600, 'num_steps_trained': 63600, 'default': {'policy_loss': -0.14219270646572113, 'vf_explained_var': 0.9002120494842529, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 50.38748550415039, 'entropy': 17.57541847229004, 'kl': 0.016468307003378868, 'total_loss': 50.261962890625}, 'sample_time_ms': 238508.049, 'grad_time_ms': 695.711, 'load_time_ms': 1.556, 'update_time_ms': 2.613}",63600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},63600,cda-server-3,-72.95456854464868
+7ffa6ff4607a442eb508661143530d5b,13417.420874357224,50.0,False,-132.3752722797274,54,1296,1756407834,-95.8701949185,1566858,1200,2025-08-28_21-03-54,{},234.73190760612488,24,54,13417.420874357224,"{'num_steps_sampled': 64800, 'num_steps_trained': 64800, 'default': {'policy_loss': -0.1341078281402588, 'vf_explained_var': 0.8943191170692444, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 55.06442642211914, 'entropy': 17.553176879882812, 'kl': 0.016393329948186874, 'total_loss': 54.94691467285156}, 'sample_time_ms': 240620.357, 'grad_time_ms': 694.996, 'load_time_ms': 1.535, 'update_time_ms': 2.629}",64800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},64800,cda-server-3,-72.95456854464868
+7ffa6ff4607a442eb508661143530d5b,13653.380255937576,50.0,False,-118.67192241336538,55,1320,1756408070,-95.79111107637159,1566858,1200,2025-08-28_21-07-50,{},235.95938158035278,24,55,13653.380255937576,"{'num_steps_sampled': 66000, 'num_steps_trained': 66000, 'default': {'policy_loss': -0.1451943963766098, 'vf_explained_var': 0.8970387578010559, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 59.29791259765625, 'entropy': 17.52239227294922, 'kl': 0.01855158805847168, 'total_loss': 59.17150115966797}, 'sample_time_ms': 237001.363, 'grad_time_ms': 696.064, 'load_time_ms': 1.505, 'update_time_ms': 2.594}",66000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},66000,cda-server-3,-72.95456854464868
+7ffa6ff4607a442eb508661143530d5b,13936.009518384933,50.0,False,-118.67192241336538,56,1344,1756408353,-95.66737848522412,1566858,1200,2025-08-28_21-12-33,{},282.6292624473572,24,56,13936.009518384933,"{'num_steps_sampled': 67200, 'num_steps_trained': 67200, 'default': {'policy_loss': -0.13796259462833405, 'vf_explained_var': 0.8547341227531433, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 80.74215698242188, 'entropy': 17.47957992553711, 'kl': 0.016449345275759697, 'total_loss': 80.62085723876953}, 'sample_time_ms': 241290.762, 'grad_time_ms': 695.682, 'load_time_ms': 1.535, 'update_time_ms': 2.581}",67200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},67200,cda-server-3,-72.95456854464868
+7ffa6ff4607a442eb508661143530d5b,14193.073428630829,50.0,False,-120.17837555190016,57,1368,1756408610,-96.01223746240512,1566858,1200,2025-08-28_21-16-50,{},257.0639102458954,24,57,14193.073428630829,"{'num_steps_sampled': 68400, 'num_steps_trained': 68400, 'default': {'policy_loss': -0.13664299249649048, 'vf_explained_var': 0.8982321619987488, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 57.72923278808594, 'entropy': 17.488954544067383, 'kl': 0.01833203062415123, 'total_loss': 57.61115264892578}, 'sample_time_ms': 241115.722, 'grad_time_ms': 695.653, 'load_time_ms': 1.486, 'update_time_ms': 2.574}",68400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},68400,cda-server-3,-77.63105009300338
+7ffa6ff4607a442eb508661143530d5b,14403.944508075714,50.0,False,-120.17837555190016,58,1392,1756408821,-95.81383784659482,1566858,1200,2025-08-28_21-20-21,{},210.87107944488525,24,58,14403.944508075714,"{'num_steps_sampled': 69600, 'num_steps_trained': 69600, 'default': {'policy_loss': -0.1360078603029251, 'vf_explained_var': 0.9107392430305481, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 46.64500045776367, 'entropy': 17.508567810058594, 'kl': 0.016890546306967735, 'total_loss': 46.52609634399414}, 'sample_time_ms': 239527.122, 'grad_time_ms': 696.5, 'load_time_ms': 1.493, 'update_time_ms': 2.548}",69600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},69600,cda-server-3,-77.63105009300338
+7ffa6ff4607a442eb508661143530d5b,14628.449810504913,50.0,False,-134.19846850030785,59,1416,1756409045,-96.00413438464108,1566858,1200,2025-08-28_21-24-05,{},224.50530242919922,24,59,14628.449810504913,"{'num_steps_sampled': 70800, 'num_steps_trained': 70800, 'default': {'policy_loss': -0.13526791334152222, 'vf_explained_var': 0.8764873743057251, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 70.6440658569336, 'entropy': 17.406675338745117, 'kl': 0.015590902417898178, 'total_loss': 70.52458190917969}, 'sample_time_ms': 238496.96, 'grad_time_ms': 696.199, 'load_time_ms': 1.53, 'update_time_ms': 2.565}",70800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},70800,cda-server-3,-68.80640733491872
+7ffa6ff4607a442eb508661143530d5b,14892.913598299026,50.0,False,-134.19846850030785,60,1440,1756409310,-95.28690626054942,1566858,1200,2025-08-28_21-28-30,{},264.46378779411316,24,60,14892.913598299026,"{'num_steps_sampled': 72000, 'num_steps_trained': 72000, 'default': {'policy_loss': -0.1450229287147522, 'vf_explained_var': 0.8648273944854736, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 73.43704223632812, 'entropy': 17.41325569152832, 'kl': 0.017410503700375557, 'total_loss': 73.30965423583984}, 'sample_time_ms': 241723.871, 'grad_time_ms': 696.371, 'load_time_ms': 1.535, 'update_time_ms': 2.538}",72000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},72000,cda-server-3,-65.12883998949023
+7ffa6ff4607a442eb508661143530d5b,15171.576050519943,50.0,False,-140.69425675750867,61,1464,1756409588,-95.57552015729631,1566858,1200,2025-08-28_21-33-08,{},278.66245222091675,24,61,15171.576050519943,"{'num_steps_sampled': 73200, 'num_steps_trained': 73200, 'default': {'policy_loss': -0.16056376695632935, 'vf_explained_var': 0.8698188066482544, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 78.02471923828125, 'entropy': 17.357572555541992, 'kl': 0.016973795369267464, 'total_loss': 77.88133239746094}, 'sample_time_ms': 245517.672, 'grad_time_ms': 696.497, 'load_time_ms': 1.534, 'update_time_ms': 2.557}",73200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},73200,cda-server-3,-65.12883998949023
+7ffa6ff4607a442eb508661143530d5b,15447.08240532875,50.0,False,-140.69425675750867,62,1488,1756409864,-93.79442351704975,1566858,1200,2025-08-28_21-37-44,{},275.5063548088074,24,62,15447.08240532875,"{'num_steps_sampled': 74400, 'num_steps_trained': 74400, 'default': {'policy_loss': -0.1545441895723343, 'vf_explained_var': 0.8350050449371338, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 69.55598449707031, 'entropy': 17.437522888183594, 'kl': 0.017163407057523727, 'total_loss': 69.41881561279297}, 'sample_time_ms': 249118.09, 'grad_time_ms': 698.277, 'load_time_ms': 1.554, 'update_time_ms': 2.574}",74400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},74400,cda-server-3,-65.12883998949023
+7ffa6ff4607a442eb508661143530d5b,15697.26745390892,50.0,False,-140.69425675750867,63,1512,1756410114,-92.36666563102112,1566858,1200,2025-08-28_21-41-54,{},250.18504858016968,24,63,15697.26745390892,"{'num_steps_sampled': 75600, 'num_steps_trained': 75600, 'default': {'policy_loss': -0.14007754623889923, 'vf_explained_var': 0.8591345548629761, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 73.84480285644531, 'entropy': 17.366647720336914, 'kl': 0.016919545829296112, 'total_loss': 73.72185516357422}, 'sample_time_ms': 250750.329, 'grad_time_ms': 698.257, 'load_time_ms': 1.524, 'update_time_ms': 2.579}",75600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},75600,cda-server-3,-62.945926316347276
+7ffa6ff4607a442eb508661143530d5b,15952.925563812256,50.0,False,-140.69425675750867,64,1536,1756410370,-91.95464920578206,1566858,1200,2025-08-28_21-46-10,{},255.65810990333557,24,64,15952.925563812256,"{'num_steps_sampled': 76800, 'num_steps_trained': 76800, 'default': {'policy_loss': -0.14222145080566406, 'vf_explained_var': 0.8870275616645813, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 54.59144592285156, 'entropy': 17.29293441772461, 'kl': 0.016513163223862648, 'total_loss': 54.4659423828125}, 'sample_time_ms': 252842.238, 'grad_time_ms': 698.963, 'load_time_ms': 1.542, 'update_time_ms': 2.566}",76800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},76800,cda-server-3,-62.945926316347276
+7ffa6ff4607a442eb508661143530d5b,16182.57912182808,50.0,False,-123.09070788121295,65,1560,1756410599,-89.69694503502396,1566858,1200,2025-08-28_21-49-59,{},229.65355801582336,24,65,16182.57912182808,"{'num_steps_sampled': 78000, 'num_steps_trained': 78000, 'default': {'policy_loss': -0.13650605082511902, 'vf_explained_var': 0.8334833979606628, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 79.48489379882812, 'entropy': 17.300722122192383, 'kl': 0.016983311623334885, 'total_loss': 79.36558532714844}, 'sample_time_ms': 252212.596, 'grad_time_ms': 698.018, 'load_time_ms': 1.542, 'update_time_ms': 2.56}",78000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},78000,cda-server-3,-62.945926316347276
+7ffa6ff4607a442eb508661143530d5b,16448.787168741226,50.0,False,-123.09070788121295,66,1584,1756410865,-88.2820038471582,1566858,1200,2025-08-28_21-54-25,{},266.208046913147,24,66,16448.787168741226,"{'num_steps_sampled': 79200, 'num_steps_trained': 79200, 'default': {'policy_loss': -0.13701409101486206, 'vf_explained_var': 0.8851307034492493, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 49.43205261230469, 'entropy': 17.186141967773438, 'kl': 0.01660430245101452, 'total_loss': 49.311851501464844}, 'sample_time_ms': 250570.621, 'grad_time_ms': 697.943, 'load_time_ms': 1.536, 'update_time_ms': 2.553}",79200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},79200,cda-server-3,-62.945926316347276
+7ffa6ff4607a442eb508661143530d5b,16695.300344944,50.0,False,-112.2154760288806,67,1608,1756411112,-87.30647296079995,1566858,1200,2025-08-28_21-58-32,{},246.51317620277405,24,67,16695.300344944,"{'num_steps_sampled': 80400, 'num_steps_trained': 80400, 'default': {'policy_loss': -0.14472953975200653, 'vf_explained_var': 0.8869979381561279, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 50.39473342895508, 'entropy': 17.204143524169922, 'kl': 0.01810036227107048, 'total_loss': 50.268333435058594}, 'sample_time_ms': 249516.096, 'grad_time_ms': 697.373, 'load_time_ms': 1.53, 'update_time_ms': 2.542}",80400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},80400,cda-server-3,-60.97078129308109
+7ffa6ff4607a442eb508661143530d5b,16937.570281505585,50.0,False,-104.9671725722534,68,1632,1756411354,-85.12118934184193,1566858,1200,2025-08-28_22-02-34,{},242.26993656158447,24,68,16937.570281505585,"{'num_steps_sampled': 81600, 'num_steps_trained': 81600, 'default': {'policy_loss': -0.1376802623271942, 'vf_explained_var': 0.8597739338874817, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 57.41520309448242, 'entropy': 17.178375244140625, 'kl': 0.0169665589928627, 'total_loss': 57.29470443725586}, 'sample_time_ms': 252656.029, 'grad_time_ms': 697.251, 'load_time_ms': 1.562, 'update_time_ms': 2.569}",81600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},81600,cda-server-3,-60.95099421013692
+7ffa6ff4607a442eb508661143530d5b,17204.35671567917,50.0,False,-108.20417373274827,69,1656,1756411621,-84.75867703744163,1566858,1200,2025-08-28_22-07-01,{},266.786434173584,24,69,17204.35671567917,"{'num_steps_sampled': 82800, 'num_steps_trained': 82800, 'default': {'policy_loss': -0.13954412937164307, 'vf_explained_var': 0.8834936618804932, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 52.70014190673828, 'entropy': 17.090085983276367, 'kl': 0.01663246750831604, 'total_loss': 52.57743835449219}, 'sample_time_ms': 256884.429, 'grad_time_ms': 697.089, 'load_time_ms': 1.488, 'update_time_ms': 2.548}",82800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},82800,cda-server-3,-60.95099421013692
+7ffa6ff4607a442eb508661143530d5b,17439.835283517838,50.0,False,-108.20417373274827,70,1680,1756411857,-84.66569654180248,1566858,1200,2025-08-28_22-10-57,{},235.47856783866882,24,70,17439.835283517838,"{'num_steps_sampled': 84000, 'num_steps_trained': 84000, 'default': {'policy_loss': -0.13918136060237885, 'vf_explained_var': 0.8703316450119019, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 57.281005859375, 'entropy': 17.030242919921875, 'kl': 0.015691058710217476, 'total_loss': 57.15771484375}, 'sample_time_ms': 253984.995, 'grad_time_ms': 697.978, 'load_time_ms': 1.486, 'update_time_ms': 2.563}",84000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},84000,cda-server-3,-60.95099421013692
+7ffa6ff4607a442eb508661143530d5b,17697.609385490417,50.0,False,-131.91481230341097,71,1704,1756412114,-84.41687713566581,1566858,1200,2025-08-28_22-15-14,{},257.77410197257996,24,71,17697.609385490417,"{'num_steps_sampled': 85200, 'num_steps_trained': 85200, 'default': {'policy_loss': -0.14263315498828888, 'vf_explained_var': 0.8583628535270691, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 61.98405075073242, 'entropy': 17.083913803100586, 'kl': 0.017013147473335266, 'total_loss': 61.85863494873047}, 'sample_time_ms': 251896.396, 'grad_time_ms': 697.808, 'load_time_ms': 1.491, 'update_time_ms': 2.553}",85200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},85200,cda-server-3,-60.95099421013692
+7ffa6ff4607a442eb508661143530d5b,17942.44306564331,50.0,False,-131.91481230341097,72,1728,1756412359,-83.9152839901135,1566858,1200,2025-08-28_22-19-19,{},244.83368015289307,24,72,17942.44306564331,"{'num_steps_sampled': 86400, 'num_steps_trained': 86400, 'default': {'policy_loss': -0.1403069943189621, 'vf_explained_var': 0.8636730909347534, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 56.658843994140625, 'entropy': 17.043167114257812, 'kl': 0.01649215817451477, 'total_loss': 56.53523635864258}, 'sample_time_ms': 248830.982, 'grad_time_ms': 696.081, 'load_time_ms': 1.428, 'update_time_ms': 2.547}",86400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},86400,cda-server-3,-61.90480025645444
+7ffa6ff4607a442eb508661143530d5b,18184.41885781288,50.0,False,-131.91481230341097,73,1752,1756412601,-83.86643944815816,1566858,1200,2025-08-28_22-23-21,{},241.97579216957092,24,73,18184.41885781288,"{'num_steps_sampled': 87600, 'num_steps_trained': 87600, 'default': {'policy_loss': -0.14834047853946686, 'vf_explained_var': 0.8909367322921753, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 44.6313362121582, 'entropy': 16.992233276367188, 'kl': 0.017693255096673965, 'total_loss': 44.50090789794922}, 'sample_time_ms': 248009.755, 'grad_time_ms': 696.471, 'load_time_ms': 1.402, 'update_time_ms': 2.523}",87600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},87600,cda-server-3,-60.02371123132278
+7ffa6ff4607a442eb508661143530d5b,18448.48611831665,50.0,False,-131.91481230341097,74,1776,1756412865,-83.91763020685005,1566858,1200,2025-08-28_22-27-45,{},264.0672605037689,24,74,18448.48611831665,"{'num_steps_sampled': 88800, 'num_steps_trained': 88800, 'default': {'policy_loss': -0.13635270297527313, 'vf_explained_var': 0.8877306580543518, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 50.658626556396484, 'entropy': 16.9888916015625, 'kl': 0.01725666970014572, 'total_loss': 50.53974533081055}, 'sample_time_ms': 248850.608, 'grad_time_ms': 696.485, 'load_time_ms': 1.416, 'update_time_ms': 2.517}",88800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},88800,cda-server-3,-59.006022251229936
+7ffa6ff4607a442eb508661143530d5b,18711.91195678711,50.0,False,-114.60479451289457,75,1800,1756413129,-83.25586565324383,1566858,1200,2025-08-28_22-32-09,{},263.425838470459,24,75,18711.91195678711,"{'num_steps_sampled': 90000, 'num_steps_trained': 90000, 'default': {'policy_loss': -0.13750618696212769, 'vf_explained_var': 0.8878066539764404, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 45.646209716796875, 'entropy': 16.916126251220703, 'kl': 0.01593046449124813, 'total_loss': 45.52482986450195}, 'sample_time_ms': 252226.95, 'grad_time_ms': 697.303, 'load_time_ms': 1.438, 'update_time_ms': 2.541}",90000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},90000,cda-server-3,-54.96061487194269
+7ffa6ff4607a442eb508661143530d5b,18935.775758504868,50.0,False,-114.60479451289457,76,1824,1756413353,-82.72014624301787,1566858,1200,2025-08-28_22-35-53,{},223.86380171775818,24,76,18935.775758504868,"{'num_steps_sampled': 91200, 'num_steps_trained': 91200, 'default': {'policy_loss': -0.1411411315202713, 'vf_explained_var': 0.90963214635849, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 31.231788635253906, 'entropy': 16.93149185180664, 'kl': 0.017530765384435654, 'total_loss': 31.108394622802734}, 'sample_time_ms': 247992.191, 'grad_time_ms': 697.658, 'load_time_ms': 1.458, 'update_time_ms': 2.527}",91200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},91200,cda-server-3,-54.96061487194269
+7ffa6ff4607a442eb508661143530d5b,19178.031841754913,50.0,False,-108.94272480428417,77,1848,1756413595,-80.92310797396698,1566858,1200,2025-08-28_22-39-55,{},242.25608325004578,24,77,19178.031841754913,"{'num_steps_sampled': 92400, 'num_steps_trained': 92400, 'default': {'policy_loss': -0.14614935219287872, 'vf_explained_var': 0.8987939357757568, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 35.67803955078125, 'entropy': 16.90253448486328, 'kl': 0.01667719893157482, 'total_loss': 35.54877471923828}, 'sample_time_ms': 247566.57, 'grad_time_ms': 697.475, 'load_time_ms': 1.53, 'update_time_ms': 2.536}",92400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},92400,cda-server-3,-54.96061487194269
+7ffa6ff4607a442eb508661143530d5b,19408.83300757408,50.0,False,-112.77217478784608,78,1872,1756413826,-78.66633419654116,1566858,1200,2025-08-28_22-43-46,{},230.8011658191681,24,78,19408.83300757408,"{'num_steps_sampled': 93600, 'num_steps_trained': 93600, 'default': {'policy_loss': -0.14167816936969757, 'vf_explained_var': 0.8663337826728821, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 49.84602355957031, 'entropy': 16.82881736755371, 'kl': 0.016407020390033722, 'total_loss': 49.72095489501953}, 'sample_time_ms': 246420.316, 'grad_time_ms': 696.87, 'load_time_ms': 1.52, 'update_time_ms': 2.508}",93600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},93600,cda-server-3,-54.96061487194269
+7ffa6ff4607a442eb508661143530d5b,19628.77525162697,50.0,False,-112.77217478784608,79,1896,1756414046,-76.82071840459376,1566858,1200,2025-08-28_22-47-26,{},219.94224405288696,24,79,19628.77525162697,"{'num_steps_sampled': 94800, 'num_steps_trained': 94800, 'default': {'policy_loss': -0.13872185349464417, 'vf_explained_var': 0.8788679838180542, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 42.41904067993164, 'entropy': 16.846330642700195, 'kl': 0.017035197466611862, 'total_loss': 42.29756164550781}, 'sample_time_ms': 241734.828, 'grad_time_ms': 697.758, 'load_time_ms': 1.631, 'update_time_ms': 2.512}",94800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},94800,cda-server-3,-55.940889508221765
+7ffa6ff4607a442eb508661143530d5b,19877.09362578392,50.0,False,-112.77217478784608,80,1920,1756414294,-75.66733800064131,1566858,1200,2025-08-28_22-51-34,{},248.3183741569519,24,80,19877.09362578392,"{'num_steps_sampled': 96000, 'num_steps_trained': 96000, 'default': {'policy_loss': -0.146415576338768, 'vf_explained_var': 0.8998842239379883, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 35.70315933227539, 'entropy': 16.746917724609375, 'kl': 0.01694786176085472, 'total_loss': 35.57390213012695}, 'sample_time_ms': 243019.57, 'grad_time_ms': 696.957, 'load_time_ms': 1.641, 'update_time_ms': 2.523}",96000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},96000,cda-server-3,-55.940889508221765
+7ffa6ff4607a442eb508661143530d5b,20132.509190797806,50.0,False,-112.77217478784608,81,1944,1756414549,-74.45318096784645,1566858,1200,2025-08-28_22-55-49,{},255.4155650138855,24,81,20132.509190797806,"{'num_steps_sampled': 97200, 'num_steps_trained': 97200, 'default': {'policy_loss': -0.1437041163444519, 'vf_explained_var': 0.8701409697532654, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 36.276241302490234, 'entropy': 16.741798400878906, 'kl': 0.015728479251265526, 'total_loss': 36.14846420288086}, 'sample_time_ms': 242784.541, 'grad_time_ms': 696.233, 'load_time_ms': 1.548, 'update_time_ms': 2.548}",97200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},97200,cda-server-3,-55.940889508221765
+7ffa6ff4607a442eb508661143530d5b,20363.48011994362,50.0,False,-112.77217478784608,82,1968,1756414780,-72.77709877049519,1566858,1200,2025-08-28_22-59-40,{},230.970929145813,24,82,20363.48011994362,"{'num_steps_sampled': 98400, 'num_steps_trained': 98400, 'default': {'policy_loss': -0.14084871113300323, 'vf_explained_var': 0.8655793070793152, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 34.792381286621094, 'entropy': 16.7445068359375, 'kl': 0.01601782813668251, 'total_loss': 34.66774368286133}, 'sample_time_ms': 241397.784, 'grad_time_ms': 696.74, 'load_time_ms': 1.538, 'update_time_ms': 2.54}",98400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},98400,cda-server-3,-53.95587853910099
+7ffa6ff4607a442eb508661143530d5b,20580.654803276062,50.0,False,-107.4522891873826,83,1992,1756414997,-70.81955430992147,1566858,1200,2025-08-28_23-03-17,{},217.17468333244324,24,83,20580.654803276062,"{'num_steps_sampled': 99600, 'num_steps_trained': 99600, 'default': {'policy_loss': -0.13204234838485718, 'vf_explained_var': 0.8760443925857544, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 31.32162094116211, 'entropy': 16.793642044067383, 'kl': 0.01535502914339304, 'total_loss': 31.20512580871582}, 'sample_time_ms': 238918.834, 'grad_time_ms': 695.57, 'load_time_ms': 1.534, 'update_time_ms': 2.518}",99600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},99600,cda-server-3,-53.95587853910099
+7ffa6ff4607a442eb508661143530d5b,20815.180485486984,50.0,False,-107.4522891873826,84,2016,1756415232,-68.76959735542866,1566858,1200,2025-08-28_23-07-12,{},234.52568221092224,24,84,20815.180485486984,"{'num_steps_sampled': 100800, 'num_steps_trained': 100800, 'default': {'policy_loss': -0.14433318376541138, 'vf_explained_var': 0.8722853064537048, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 29.641008377075195, 'entropy': 16.78034210205078, 'kl': 0.01663334108889103, 'total_loss': 29.51351547241211}, 'sample_time_ms': 235965.602, 'grad_time_ms': 694.781, 'load_time_ms': 1.439, 'update_time_ms': 2.549}",100800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},100800,cda-server-3,-53.95587853910099
+7ffa6ff4607a442eb508661143530d5b,21029.106865644455,50.0,False,-107.98561338414216,85,2040,1756415446,-69.05470528023939,1566858,1200,2025-08-28_23-10-46,{},213.9263801574707,24,85,21029.106865644455,"{'num_steps_sampled': 102000, 'num_steps_trained': 102000, 'default': {'policy_loss': -0.14800840616226196, 'vf_explained_var': 0.8737674355506897, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 44.28384780883789, 'entropy': 16.667724609375, 'kl': 0.017794229090213776, 'total_loss': 44.15385818481445}, 'sample_time_ms': 231016.692, 'grad_time_ms': 693.88, 'load_time_ms': 1.364, 'update_time_ms': 2.556}",102000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},102000,cda-server-3,-53.95587853910099
+7ffa6ff4607a442eb508661143530d5b,21267.91470694542,50.0,False,-107.98561338414216,86,2064,1756415685,-69.5951626072507,1566858,1200,2025-08-28_23-14-45,{},238.80784130096436,24,86,21267.91470694542,"{'num_steps_sampled': 103200, 'num_steps_trained': 103200, 'default': {'policy_loss': -0.1315995305776596, 'vf_explained_var': 0.8468186855316162, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 42.02323913574219, 'entropy': 16.615787506103516, 'kl': 0.016590215265750885, 'total_loss': 41.90843963623047}, 'sample_time_ms': 232511.672, 'grad_time_ms': 693.298, 'load_time_ms': 1.332, 'update_time_ms': 2.554}",103200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},103200,cda-server-3,-54.95182090997833
+7ffa6ff4607a442eb508661143530d5b,21523.015555143356,50.0,False,-107.98561338414216,87,2088,1756415940,-68.46944199107841,1566858,1200,2025-08-28_23-19-00,{},255.100848197937,24,87,21523.015555143356,"{'num_steps_sampled': 104400, 'num_steps_trained': 104400, 'default': {'policy_loss': -0.13484853506088257, 'vf_explained_var': 0.8937379121780396, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 24.742692947387695, 'entropy': 16.481712341308594, 'kl': 0.01563051901757717, 'total_loss': 24.62367057800293}, 'sample_time_ms': 233795.722, 'grad_time_ms': 693.749, 'load_time_ms': 1.314, 'update_time_ms': 2.571}",104400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},104400,cda-server-3,-54.95182090997833
+7ffa6ff4607a442eb508661143530d5b,21755.724896669388,50.0,False,-107.98561338414216,88,2112,1756416173,-68.90574880241981,1566858,1200,2025-08-28_23-22-53,{},232.7093415260315,24,88,21755.724896669388,"{'num_steps_sampled': 105600, 'num_steps_trained': 105600, 'default': {'policy_loss': -0.14801417291164398, 'vf_explained_var': 0.863982617855072, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 38.028564453125, 'entropy': 16.538761138916016, 'kl': 0.01715698093175888, 'total_loss': 37.89792251586914}, 'sample_time_ms': 233985.216, 'grad_time_ms': 695.112, 'load_time_ms': 1.293, 'update_time_ms': 2.564}",105600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},105600,cda-server-3,-53.2230760042775
+7ffa6ff4607a442eb508661143530d5b,21995.29202914238,50.0,False,-115.55456980047862,89,2136,1756416412,-68.03962091148274,1566858,1200,2025-08-28_23-26-52,{},239.56713247299194,24,89,21995.29202914238,"{'num_steps_sampled': 106800, 'num_steps_trained': 106800, 'default': {'policy_loss': -0.13819807767868042, 'vf_explained_var': 0.860944390296936, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 40.49483108520508, 'entropy': 16.507343292236328, 'kl': 0.015339210629463196, 'total_loss': 40.37216567993164}, 'sample_time_ms': 235947.864, 'grad_time_ms': 694.981, 'load_time_ms': 1.258, 'update_time_ms': 2.587}",106800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},106800,cda-server-3,-53.2230760042775
+7ffa6ff4607a442eb508661143530d5b,22200.77853822708,50.0,False,-115.55456980047862,90,2160,1756416618,-66.43045601954503,1566858,1200,2025-08-28_23-30-18,{},205.48650908470154,24,90,22200.77853822708,"{'num_steps_sampled': 108000, 'num_steps_trained': 108000, 'default': {'policy_loss': -0.1232977956533432, 'vf_explained_var': 0.8503206968307495, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 30.72481918334961, 'entropy': 16.54416275024414, 'kl': 0.015979347750544548, 'total_loss': 30.61770248413086}, 'sample_time_ms': 231664.322, 'grad_time_ms': 695.498, 'load_time_ms': 1.195, 'update_time_ms': 2.564}",108000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},108000,cda-server-3,-53.2230760042775
+7ffa6ff4607a442eb508661143530d5b,22468.302712917328,50.0,False,-115.55456980047862,91,2184,1756416885,-67.06299508675095,1566858,1200,2025-08-28_23-34-45,{},267.5241746902466,24,91,22468.302712917328,"{'num_steps_sampled': 109200, 'num_steps_trained': 109200, 'default': {'policy_loss': -0.1417466253042221, 'vf_explained_var': 0.8252907991409302, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 51.491268157958984, 'entropy': 16.51049041748047, 'kl': 0.0179302878677845, 'total_loss': 51.367679595947266}, 'sample_time_ms': 232874.007, 'grad_time_ms': 696.569, 'load_time_ms': 1.295, 'update_time_ms': 2.534}",109200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},109200,cda-server-3,-53.2230760042775
+7ffa6ff4607a442eb508661143530d5b,22723.705909967422,50.0,False,-115.55456980047862,92,2208,1756417141,-65.61862931952739,1566858,1200,2025-08-28_23-39-01,{},255.4031970500946,24,92,22723.705909967422,"{'num_steps_sampled': 110400, 'num_steps_trained': 110400, 'default': {'policy_loss': -0.13279880583286285, 'vf_explained_var': 0.9043550491333008, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 18.820959091186523, 'entropy': 16.467430114746094, 'kl': 0.016397977247834206, 'total_loss': 18.70476531982422}, 'sample_time_ms': 235317.525, 'grad_time_ms': 696.121, 'load_time_ms': 1.383, 'update_time_ms': 2.551}",110400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},110400,cda-server-3,-53.2230760042775
+7ffa6ff4607a442eb508661143530d5b,22970.57584619522,50.0,False,-115.55456980047862,93,2232,1756417387,-64.91044855525129,1566858,1200,2025-08-28_23-43-07,{},246.86993622779846,24,93,22970.57584619522,"{'num_steps_sampled': 111600, 'num_steps_trained': 111600, 'default': {'policy_loss': -0.1461043804883957, 'vf_explained_var': 0.8137485980987549, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 40.37269973754883, 'entropy': 16.43319320678711, 'kl': 0.017595432698726654, 'total_loss': 40.24441146850586}, 'sample_time_ms': 238287.126, 'grad_time_ms': 695.95, 'load_time_ms': 1.431, 'update_time_ms': 2.577}",111600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},111600,cda-server-3,-53.897588277465395
+7ffa6ff4607a442eb508661143530d5b,23212.88718509674,50.0,False,-98.42147392309447,94,2256,1756417630,-64.81923247327849,1566858,1200,2025-08-28_23-47-10,{},242.31133890151978,24,94,23212.88718509674,"{'num_steps_sampled': 112800, 'num_steps_trained': 112800, 'default': {'policy_loss': -0.14040905237197876, 'vf_explained_var': 0.8580853343009949, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 31.303335189819336, 'entropy': 16.410274505615234, 'kl': 0.015972889959812164, 'total_loss': 31.17909812927246}, 'sample_time_ms': 239064.399, 'grad_time_ms': 697.275, 'load_time_ms': 1.437, 'update_time_ms': 2.554}",112800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},112800,cda-server-3,-54.8650017855454
+7ffa6ff4607a442eb508661143530d5b,23426.63425207138,50.0,False,-98.42147392309447,95,2280,1756417844,-63.39130856250731,1566858,1200,2025-08-28_23-50-44,{},213.7470669746399,24,95,23426.63425207138,"{'num_steps_sampled': 114000, 'num_steps_trained': 114000, 'default': {'policy_loss': -0.12778830528259277, 'vf_explained_var': 0.8555526733398438, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 26.613889694213867, 'entropy': 16.41890525817871, 'kl': 0.01759319193661213, 'total_loss': 26.503915786743164}, 'sample_time_ms': 239045.459, 'grad_time_ms': 698.177, 'load_time_ms': 1.501, 'update_time_ms': 2.552}",114000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},114000,cda-server-3,-54.074040013498
+7ffa6ff4607a442eb508661143530d5b,23677.734798192978,50.0,False,-101.42589945490688,96,2304,1756418095,-63.60230280510545,1566858,1200,2025-08-28_23-54-55,{},251.1005461215973,24,96,23677.734798192978,"{'num_steps_sampled': 115200, 'num_steps_trained': 115200, 'default': {'policy_loss': -0.13580124080181122, 'vf_explained_var': 0.8498879075050354, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 35.0389404296875, 'entropy': 16.359601974487305, 'kl': 0.015551825053989887, 'total_loss': 34.918888092041016}, 'sample_time_ms': 240275.117, 'grad_time_ms': 697.774, 'load_time_ms': 1.513, 'update_time_ms': 2.554}",115200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},115200,cda-server-3,-52.858943297092495
+7ffa6ff4607a442eb508661143530d5b,23935.555701732635,50.0,False,-101.42589945490688,97,2328,1756418353,-64.01519855934126,1566858,1200,2025-08-28_23-59-13,{},257.8209035396576,24,97,23935.555701732635,"{'num_steps_sampled': 116400, 'num_steps_trained': 116400, 'default': {'policy_loss': -0.14195483922958374, 'vf_explained_var': 0.8827171921730042, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 26.063066482543945, 'entropy': 16.319156646728516, 'kl': 0.015602422878146172, 'total_loss': 25.936906814575195}, 'sample_time_ms': 240547.419, 'grad_time_ms': 697.496, 'load_time_ms': 1.509, 'update_time_ms': 2.535}",116400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},116400,cda-server-3,-52.858943297092495
+7ffa6ff4607a442eb508661143530d5b,24136.122399806976,50.0,False,-101.42589945490688,98,2352,1756418553,-63.12411026398803,1566858,1200,2025-08-29_00-02-33,{},200.56669807434082,24,98,24136.122399806976,"{'num_steps_sampled': 117600, 'num_steps_trained': 117600, 'default': {'policy_loss': -0.12811775505542755, 'vf_explained_var': 0.8606259822845459, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 26.62224578857422, 'entropy': 16.33074188232422, 'kl': 0.01618027687072754, 'total_loss': 26.510509490966797}, 'sample_time_ms': 237334.149, 'grad_time_ms': 696.568, 'load_time_ms': 1.489, 'update_time_ms': 2.533}",117600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},117600,cda-server-3,-52.858943297092495
+7ffa6ff4607a442eb508661143530d5b,24407.357256412506,50.0,False,-101.42589945490688,99,2376,1756418824,-62.44087108395161,1566858,1200,2025-08-29_00-07-04,{},271.2348566055298,24,99,24407.357256412506,"{'num_steps_sampled': 118800, 'num_steps_trained': 118800, 'default': {'policy_loss': -0.1403992772102356, 'vf_explained_var': 0.8786462545394897, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 19.873876571655273, 'entropy': 16.25355339050293, 'kl': 0.0183703675866127, 'total_loss': 19.752073287963867}, 'sample_time_ms': 240501.394, 'grad_time_ms': 696.21, 'load_time_ms': 1.433, 'update_time_ms': 2.511}",118800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},118800,cda-server-3,-52.858943297092495
+7ffa6ff4607a442eb508661143530d5b,24657.685720443726,50.0,False,-89.90023007400104,100,2400,1756419075,-61.653440944418,1566858,1200,2025-08-29_00-11-15,{},250.32846403121948,24,100,24657.685720443726,"{'num_steps_sampled': 120000, 'num_steps_trained': 120000, 'default': {'policy_loss': -0.1290886104106903, 'vf_explained_var': 0.8327670097351074, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 31.24372100830078, 'entropy': 16.272443771362305, 'kl': 0.015662631019949913, 'total_loss': 31.1304931640625}, 'sample_time_ms': 244985.593, 'grad_time_ms': 696.091, 'load_time_ms': 1.492, 'update_time_ms': 2.499}",120000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},120000,cda-server-3,-52.93601767317048
+7ffa6ff4607a442eb508661143530d5b,24920.84255218506,50.0,False,-89.90023007400104,101,2424,1756419338,-61.131917472065616,1566858,1200,2025-08-29_00-15-38,{},263.156831741333,24,101,24920.84255218506,"{'num_steps_sampled': 121200, 'num_steps_trained': 121200, 'default': {'policy_loss': -0.11977836489677429, 'vf_explained_var': 0.8679201006889343, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 26.77100372314453, 'entropy': 16.270166397094727, 'kl': 0.015529219061136246, 'total_loss': 26.66695213317871}, 'sample_time_ms': 244548.732, 'grad_time_ms': 696.194, 'load_time_ms': 1.493, 'update_time_ms': 2.508}",121200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},121200,cda-server-3,-52.93601767317048
+7ffa6ff4607a442eb508661143530d5b,25163.190752744675,50.0,False,-81.1903957303375,102,2448,1756419580,-60.72351474107361,1566858,1200,2025-08-29_00-19-40,{},242.3482005596161,24,102,25163.190752744675,"{'num_steps_sampled': 122400, 'num_steps_trained': 122400, 'default': {'policy_loss': -0.12940487265586853, 'vf_explained_var': 0.882462739944458, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 23.390724182128906, 'entropy': 16.316390991210938, 'kl': 0.015969369560480118, 'total_loss': 23.277488708496094}, 'sample_time_ms': 243243.22, 'grad_time_ms': 696.232, 'load_time_ms': 1.496, 'update_time_ms': 2.468}",122400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},122400,cda-server-3,-52.93601767317048
+7ffa6ff4607a442eb508661143530d5b,25435.75412583351,50.0,False,-81.1903957303375,103,2472,1756419853,-60.79222265253318,1566858,1200,2025-08-29_00-24-13,{},272.56337308883667,24,103,25435.75412583351,"{'num_steps_sampled': 123600, 'num_steps_trained': 123600, 'default': {'policy_loss': -0.13579684495925903, 'vf_explained_var': 0.8778722882270813, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 22.02468490600586, 'entropy': 16.328903198242188, 'kl': 0.016616467386484146, 'total_loss': 21.90571403503418}, 'sample_time_ms': 245811.227, 'grad_time_ms': 697.59, 'load_time_ms': 1.492, 'update_time_ms': 2.459}",123600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},123600,cda-server-3,-52.93601767317048
+7ffa6ff4607a442eb508661143530d5b,25665.85821557045,50.0,False,-81.1903957303375,104,2496,1756420083,-60.116771525483344,1566858,1200,2025-08-29_00-28-03,{},230.10408973693848,24,104,25665.85821557045,"{'num_steps_sampled': 124800, 'num_steps_trained': 124800, 'default': {'policy_loss': -0.15344049036502838, 'vf_explained_var': 0.8817589282989502, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 18.401994705200195, 'entropy': 16.331209182739258, 'kl': 0.016265608370304108, 'total_loss': 18.26502227783203}, 'sample_time_ms': 244590.369, 'grad_time_ms': 697.667, 'load_time_ms': 1.531, 'update_time_ms': 2.454}",124800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},124800,cda-server-3,-52.95849628922025
+7ffa6ff4607a442eb508661143530d5b,25892.035324811935,50.0,False,-82.16212772395187,105,2520,1756420309,-60.16023217998311,1566858,1200,2025-08-29_00-31-49,{},226.1771092414856,24,105,25892.035324811935,"{'num_steps_sampled': 126000, 'num_steps_trained': 126000, 'default': {'policy_loss': -0.12750448286533356, 'vf_explained_var': 0.8136303424835205, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 38.1964111328125, 'entropy': 16.27743148803711, 'kl': 0.016278643161058426, 'total_loss': 38.08538818359375}, 'sample_time_ms': 245834.201, 'grad_time_ms': 697.003, 'load_time_ms': 1.436, 'update_time_ms': 2.444}",126000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},126000,cda-server-3,-52.93952025325732
+7ffa6ff4607a442eb508661143530d5b,26110.90698647499,50.0,False,-82.16212772395187,106,2544,1756420528,-59.81542332779563,1566858,1200,2025-08-29_00-35-28,{},218.87166166305542,24,106,26110.90698647499,"{'num_steps_sampled': 127200, 'num_steps_trained': 127200, 'default': {'policy_loss': -0.13325509428977966, 'vf_explained_var': 0.8773702383041382, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 22.744340896606445, 'entropy': 16.201231002807617, 'kl': 0.016619432717561722, 'total_loss': 22.627914428710938}, 'sample_time_ms': 242611.106, 'grad_time_ms': 697.237, 'load_time_ms': 1.401, 'update_time_ms': 2.481}",127200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},127200,cda-server-3,-52.914738431937806
+7ffa6ff4607a442eb508661143530d5b,26360.525168180466,50.0,False,-82.16212772395187,107,2568,1756420778,-59.673469220947396,1566858,1200,2025-08-29_00-39-38,{},249.61818170547485,24,107,26360.525168180466,"{'num_steps_sampled': 128400, 'num_steps_trained': 128400, 'default': {'policy_loss': -0.12265331298112869, 'vf_explained_var': 0.8668314218521118, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 21.25311279296875, 'entropy': 16.13929557800293, 'kl': 0.017199309542775154, 'total_loss': 21.14787483215332}, 'sample_time_ms': 241790.366, 'grad_time_ms': 697.759, 'load_time_ms': 1.38, 'update_time_ms': 2.478}",128400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},128400,cda-server-3,-51.02603246046728
+7ffa6ff4607a442eb508661143530d5b,26604.6365506649,50.0,False,-82.16212772395187,108,2592,1756421022,-59.606878303662,1566858,1200,2025-08-29_00-43-42,{},244.11138248443604,24,108,26604.6365506649,"{'num_steps_sampled': 129600, 'num_steps_trained': 129600, 'default': {'policy_loss': -0.13076123595237732, 'vf_explained_var': 0.8132724761962891, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 33.19261169433594, 'entropy': 16.225126266479492, 'kl': 0.01657184027135372, 'total_loss': 33.07863235473633}, 'sample_time_ms': 246144.027, 'grad_time_ms': 698.614, 'load_time_ms': 1.333, 'update_time_ms': 2.485}",129600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},129600,cda-server-3,-51.02603246046728
+7ffa6ff4607a442eb508661143530d5b,26834.84356546402,50.0,False,-84.93840741162363,109,2616,1756421252,-59.694966777893185,1566858,1200,2025-08-29_00-47-32,{},230.20701479911804,24,109,26834.84356546402,"{'num_steps_sampled': 130800, 'num_steps_trained': 130800, 'default': {'policy_loss': -0.12276914715766907, 'vf_explained_var': 0.85801762342453, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 28.201007843017578, 'entropy': 16.107158660888672, 'kl': 0.015431146137416363, 'total_loss': 28.093862533569336}, 'sample_time_ms': 242041.052, 'grad_time_ms': 698.686, 'load_time_ms': 1.388, 'update_time_ms': 2.481}",130800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},130800,cda-server-3,-50.069767460137605
+7ffa6ff4607a442eb508661143530d5b,27092.147441625595,50.0,False,-84.93840741162363,110,2640,1756421509,-59.04001522812641,1566858,1200,2025-08-29_00-51-49,{},257.3038761615753,24,110,27092.147441625595,"{'num_steps_sampled': 132000, 'num_steps_trained': 132000, 'default': {'policy_loss': -0.13041992485523224, 'vf_explained_var': 0.8788143396377563, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 20.625926971435547, 'entropy': 16.039676666259766, 'kl': 0.017292585223913193, 'total_loss': 20.513015747070312}, 'sample_time_ms': 242737.964, 'grad_time_ms': 699.245, 'load_time_ms': 1.426, 'update_time_ms': 2.5}",132000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},132000,cda-server-3,-50.069767460137605
+7ffa6ff4607a442eb508661143530d5b,27331.856004953384,50.0,False,-84.93840741162363,111,2664,1756421749,-59.07273972534611,1566858,1200,2025-08-29_00-55-49,{},239.7085633277893,24,111,27331.856004953384,"{'num_steps_sampled': 133200, 'num_steps_trained': 133200, 'default': {'policy_loss': -0.12182916700839996, 'vf_explained_var': 0.8311696648597717, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 29.08080291748047, 'entropy': 16.15522003173828, 'kl': 0.016714682802557945, 'total_loss': 28.97589874267578}, 'sample_time_ms': 240394.014, 'grad_time_ms': 698.393, 'load_time_ms': 1.415, 'update_time_ms': 2.512}",133200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},133200,cda-server-3,-50.069767460137605
+7ffa6ff4607a442eb508661143530d5b,27587.61087012291,50.0,False,-86.78311202087484,112,2688,1756422005,-59.18881358171987,1566858,1200,2025-08-29_01-00-05,{},255.75486516952515,24,112,27587.61087012291,"{'num_steps_sampled': 134400, 'num_steps_trained': 134400, 'default': {'policy_loss': -0.140395849943161, 'vf_explained_var': 0.8482707738876343, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 28.473026275634766, 'entropy': 16.06305694580078, 'kl': 0.016294434666633606, 'total_loss': 28.3491268157959}, 'sample_time_ms': 241734.136, 'grad_time_ms': 698.843, 'load_time_ms': 1.411, 'update_time_ms': 2.56}",134400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},134400,cda-server-3,-50.069767460137605
+7ffa6ff4607a442eb508661143530d5b,27806.619978904724,50.0,False,-86.78311202087484,113,2712,1756422224,-58.635297871876844,1566858,1200,2025-08-29_01-03-44,{},219.00910878181458,24,113,27806.619978904724,"{'num_steps_sampled': 135600, 'num_steps_trained': 135600, 'default': {'policy_loss': -0.1381041407585144, 'vf_explained_var': 0.9284831285476685, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 11.675448417663574, 'entropy': 16.15050506591797, 'kl': 0.016876710578799248, 'total_loss': 11.554431915283203}, 'sample_time_ms': 236379.743, 'grad_time_ms': 697.874, 'load_time_ms': 1.368, 'update_time_ms': 2.548}",135600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},135600,cda-server-3,-52.83280264414459
+7ffa6ff4607a442eb508661143530d5b,28034.539868831635,50.0,False,-92.99670859655961,114,2736,1756422452,-58.661219019914526,1566858,1200,2025-08-29_01-07-32,{},227.9198899269104,24,114,28034.539868831635,"{'num_steps_sampled': 136800, 'num_steps_trained': 136800, 'default': {'policy_loss': -0.13784296810626984, 'vf_explained_var': 0.8285303115844727, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 34.57063293457031, 'entropy': 16.13674545288086, 'kl': 0.016130059957504272, 'total_loss': 34.44912338256836}, 'sample_time_ms': 236162.246, 'grad_time_ms': 696.885, 'load_time_ms': 1.405, 'update_time_ms': 2.553}",136800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},136800,cda-server-3,-52.83280264414459
+7ffa6ff4607a442eb508661143530d5b,28336.070405721664,50.0,False,-92.99670859655961,115,2760,1756422753,-58.39664888282129,1566858,1200,2025-08-29_01-12-33,{},301.5305368900299,24,115,28336.070405721664,"{'num_steps_sampled': 138000, 'num_steps_trained': 138000, 'default': {'policy_loss': -0.12080780416727066, 'vf_explained_var': 0.8865867853164673, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 17.852455139160156, 'entropy': 16.0211238861084, 'kl': 0.015340049751102924, 'total_loss': 17.747180938720703}, 'sample_time_ms': 243697.146, 'grad_time_ms': 697.206, 'load_time_ms': 1.493, 'update_time_ms': 2.538}",138000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},138000,cda-server-3,-51.878619471983534
+7ffa6ff4607a442eb508661143530d5b,28640.148250341415,50.0,False,-92.99670859655961,116,2784,1756423057,-57.572993058078616,1566858,1200,2025-08-29_01-17-37,{},304.077844619751,24,116,28640.148250341415,"{'num_steps_sampled': 139200, 'num_steps_trained': 139200, 'default': {'policy_loss': -0.1292750984430313, 'vf_explained_var': 0.8631255030632019, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 22.42864227294922, 'entropy': 16.13391876220703, 'kl': 0.01578795537352562, 'total_loss': 22.315351486206055}, 'sample_time_ms': 252216.644, 'grad_time_ms': 698.308, 'load_time_ms': 1.533, 'update_time_ms': 2.518}",139200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},139200,cda-server-3,-51.878619471983534
+7ffa6ff4607a442eb508661143530d5b,28893.488532304764,50.0,False,-92.99670859655961,117,2808,1756423311,-58.42551707762653,1566858,1200,2025-08-29_01-21-51,{},253.3402819633484,24,117,28893.488532304764,"{'num_steps_sampled': 140400, 'num_steps_trained': 140400, 'default': {'policy_loss': -0.1352321207523346, 'vf_explained_var': 0.8869233727455139, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 21.718400955200195, 'entropy': 16.096532821655273, 'kl': 0.014851750805974007, 'total_loss': 21.59820556640625}, 'sample_time_ms': 252588.37, 'grad_time_ms': 698.676, 'load_time_ms': 1.574, 'update_time_ms': 2.512}",140400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},140400,cda-server-3,-51.878619471983534
+7ffa6ff4607a442eb508661143530d5b,29127.301443338394,50.0,False,-88.65954468392255,118,2832,1756423544,-58.24708847794195,1566858,1200,2025-08-29_01-25-44,{},233.81291103363037,24,118,29127.301443338394,"{'num_steps_sampled': 141600, 'num_steps_trained': 141600, 'default': {'policy_loss': -0.13132750988006592, 'vf_explained_var': 0.7502151727676392, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 54.89358901977539, 'entropy': 16.050901412963867, 'kl': 0.015956096351146698, 'total_loss': 54.778411865234375}, 'sample_time_ms': 251559.104, 'grad_time_ms': 698.078, 'load_time_ms': 1.579, 'update_time_ms': 2.541}",141600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},141600,cda-server-3,-51.86353434737764
+7ffa6ff4607a442eb508661143530d5b,29348.139184951782,50.0,False,-88.65954468392255,119,2856,1756423765,-58.77166939777696,1566858,1200,2025-08-29_01-29-25,{},220.83774161338806,24,119,29348.139184951782,"{'num_steps_sampled': 142800, 'num_steps_trained': 142800, 'default': {'policy_loss': -0.1422090232372284, 'vf_explained_var': 0.9099141359329224, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 15.585625648498535, 'entropy': 15.948657989501953, 'kl': 0.01609078049659729, 'total_loss': 15.459708213806152}, 'sample_time_ms': 250621.264, 'grad_time_ms': 699.035, 'load_time_ms': 1.565, 'update_time_ms': 2.559}",142800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},142800,cda-server-3,-51.86353434737764
+7ffa6ff4607a442eb508661143530d5b,29608.62323451042,50.0,False,-88.65954468392255,120,2880,1756424026,-59.42504088928788,1566858,1200,2025-08-29_01-33-46,{},260.4840495586395,24,120,29608.62323451042,"{'num_steps_sampled': 144000, 'num_steps_trained': 144000, 'default': {'policy_loss': -0.1321364790201187, 'vf_explained_var': 0.8487840890884399, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 26.277753829956055, 'entropy': 15.773978233337402, 'kl': 0.01596074178814888, 'total_loss': 26.16177749633789}, 'sample_time_ms': 250940.818, 'grad_time_ms': 697.606, 'load_time_ms': 1.492, 'update_time_ms': 2.549}",144000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},144000,cda-server-3,-51.15904062506867
+7ffa6ff4607a442eb508661143530d5b,29864.01040172577,50.0,False,-105.43882619369613,121,2904,1756424281,-59.08303499192223,1566858,1200,2025-08-29_01-38-01,{},255.3871672153473,24,121,29864.01040172577,"{'num_steps_sampled': 145200, 'num_steps_trained': 145200, 'default': {'policy_loss': -0.11356958746910095, 'vf_explained_var': 0.7982986569404602, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 45.23063278198242, 'entropy': 15.830936431884766, 'kl': 0.01379266008734703, 'total_loss': 45.13102722167969}, 'sample_time_ms': 252508.147, 'grad_time_ms': 698.153, 'load_time_ms': 1.499, 'update_time_ms': 2.525}",145200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},145200,cda-server-3,-51.15904062506867
+7ffa6ff4607a442eb508661143530d5b,30100.95377969742,50.0,False,-105.43882619369613,122,2928,1756424518,-58.620452296311754,1566858,1200,2025-08-29_01-41-58,{},236.94337797164917,24,122,30100.95377969742,"{'num_steps_sampled': 146400, 'num_steps_trained': 146400, 'default': {'policy_loss': -0.14042048156261444, 'vf_explained_var': 0.9276683330535889, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 10.557514190673828, 'entropy': 15.92667293548584, 'kl': 0.016386190429329872, 'total_loss': 10.433683395385742}, 'sample_time_ms': 250627.109, 'grad_time_ms': 698.127, 'load_time_ms': 1.5, 'update_time_ms': 2.486}",146400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},146400,cda-server-3,-51.15904062506867
+7ffa6ff4607a442eb508661143530d5b,30326.80412006378,50.0,False,-105.43882619369613,123,2952,1756424744,-58.52559615811242,1566858,1200,2025-08-29_01-45-44,{},225.85034036636353,24,123,30326.80412006378,"{'num_steps_sampled': 147600, 'num_steps_trained': 147600, 'default': {'policy_loss': -0.13525259494781494, 'vf_explained_var': 0.8784549236297607, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 20.95810317993164, 'entropy': 15.93128776550293, 'kl': 0.014947210438549519, 'total_loss': 20.837984085083008}, 'sample_time_ms': 251310.891, 'grad_time_ms': 698.384, 'load_time_ms': 1.545, 'update_time_ms': 2.509}",147600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},147600,cda-server-3,-51.15904062506867
+7ffa6ff4607a442eb508661143530d5b,30578.466166734695,50.0,False,-105.43882619369613,124,2976,1756424996,-57.50727125609862,1566858,1200,2025-08-29_01-49-56,{},251.6620466709137,24,124,30578.466166734695,"{'num_steps_sampled': 148800, 'num_steps_trained': 148800, 'default': {'policy_loss': -0.1361684650182724, 'vf_explained_var': 0.8873589634895325, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 18.034313201904297, 'entropy': 15.900761604309082, 'kl': 0.01829693093895912, 'total_loss': 17.916669845581055}, 'sample_time_ms': 253684.863, 'grad_time_ms': 698.555, 'load_time_ms': 1.553, 'update_time_ms': 2.52}",148800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},148800,cda-server-3,-51.15904062506867
+7ffa6ff4607a442eb508661143530d5b,30794.069765806198,50.0,False,-86.28003722489746,125,3000,1756425211,-56.68986158951923,1566858,1200,2025-08-29_01-53-31,{},215.60359907150269,24,125,30794.069765806198,"{'num_steps_sampled': 150000, 'num_steps_trained': 150000, 'default': {'policy_loss': -0.14660833775997162, 'vf_explained_var': 0.9275010228157043, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 11.063407897949219, 'entropy': 15.772184371948242, 'kl': 0.016784558072686195, 'total_loss': 10.933794021606445}, 'sample_time_ms': 245091.181, 'grad_time_ms': 699.565, 'load_time_ms': 1.515, 'update_time_ms': 2.549}",150000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},150000,cda-server-3,-51.562097171397795
+7ffa6ff4607a442eb508661143530d5b,31028.55344748497,50.0,False,-86.28003722489746,126,3024,1756425446,-56.94375005421898,1566858,1200,2025-08-29_01-57-26,{},234.48368167877197,24,126,31028.55344748497,"{'num_steps_sampled': 151200, 'num_steps_trained': 151200, 'default': {'policy_loss': -0.12616945803165436, 'vf_explained_var': 0.9062788486480713, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 14.618449211120605, 'entropy': 15.753960609436035, 'kl': 0.017071321606636047, 'total_loss': 14.509563446044922}, 'sample_time_ms': 238132.026, 'grad_time_ms': 699.342, 'load_time_ms': 1.5, 'update_time_ms': 2.547}",151200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},151200,cda-server-3,-51.562097171397795
+7ffa6ff4607a442eb508661143530d5b,31243.675163269043,50.0,False,-80.45088177963235,127,3048,1756425661,-56.65103369765881,1566858,1200,2025-08-29_02-01-01,{},215.12171578407288,24,127,31243.675163269043,"{'num_steps_sampled': 152400, 'num_steps_trained': 152400, 'default': {'policy_loss': -0.11479911208152771, 'vf_explained_var': 0.7940958738327026, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 36.7393913269043, 'entropy': 15.598017692565918, 'kl': 0.015916500240564346, 'total_loss': 36.640708923339844}, 'sample_time_ms': 234310.54, 'grad_time_ms': 699.01, 'load_time_ms': 1.489, 'update_time_ms': 2.553}",152400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},152400,cda-server-3,-51.562097171397795
+7ffa6ff4607a442eb508661143530d5b,31459.257354974747,50.0,False,-79.52035184489046,128,3072,1756425876,-56.38341833475086,1566858,1200,2025-08-29_02-04-36,{},215.58219170570374,24,128,31459.257354974747,"{'num_steps_sampled': 153600, 'num_steps_trained': 153600, 'default': {'policy_loss': -0.14602722227573395, 'vf_explained_var': 0.9135898947715759, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 12.709991455078125, 'entropy': 15.712790489196777, 'kl': 0.017770998179912567, 'total_loss': 12.58195686340332}, 'sample_time_ms': 232487.546, 'grad_time_ms': 698.863, 'load_time_ms': 1.556, 'update_time_ms': 2.519}",153600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},153600,cda-server-3,-51.562097171397795
+7ffa6ff4607a442eb508661143530d5b,31731.1027405262,50.0,False,-79.52035184489046,129,3096,1756426148,-56.568582278886524,1566858,1200,2025-08-29_02-09-08,{},271.84538555145264,24,129,31731.1027405262,"{'num_steps_sampled': 154800, 'num_steps_trained': 154800, 'default': {'policy_loss': -0.12171263247728348, 'vf_explained_var': 0.8592672348022461, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 21.35623550415039, 'entropy': 15.582194328308105, 'kl': 0.016301354393363, 'total_loss': 21.251028060913086}, 'sample_time_ms': 237589.432, 'grad_time_ms': 697.683, 'load_time_ms': 1.595, 'update_time_ms': 2.499}",154800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},154800,cda-server-3,-51.830023605268046
+7ffa6ff4607a442eb508661143530d5b,31959.038396835327,50.0,False,-79.52035184489046,130,3120,1756426376,-56.192528320350384,1566858,1200,2025-08-29_02-12-56,{},227.9356563091278,24,130,31959.038396835327,"{'num_steps_sampled': 156000, 'num_steps_trained': 156000, 'default': {'policy_loss': -0.12034373730421066, 'vf_explained_var': 0.9029307961463928, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 14.961315155029297, 'entropy': 15.58940315246582, 'kl': 0.01572321727871895, 'total_loss': 14.856891632080078}, 'sample_time_ms': 234334.761, 'grad_time_ms': 697.659, 'load_time_ms': 1.537, 'update_time_ms': 2.496}",156000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},156000,cda-server-3,-51.19035379947645
+7ffa6ff4607a442eb508661143530d5b,32195.998419046402,50.0,False,-70.91395079921254,131,3144,1756426613,-55.76523066885816,1566858,1200,2025-08-29_02-16-53,{},236.96002221107483,24,131,32195.998419046402,"{'num_steps_sampled': 157200, 'num_steps_trained': 157200, 'default': {'policy_loss': -0.13559547066688538, 'vf_explained_var': 0.9158918857574463, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 12.444634437561035, 'entropy': 15.584784507751465, 'kl': 0.016098035499453545, 'total_loss': 12.325338363647461}, 'sample_time_ms': 232491.655, 'grad_time_ms': 697.978, 'load_time_ms': 1.559, 'update_time_ms': 2.546}",157200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},157200,cda-server-3,-51.09991333542589
+7ffa6ff4607a442eb508661143530d5b,32449.557423353195,50.0,False,-70.91395079921254,132,3168,1756426867,-55.86116142206185,1566858,1200,2025-08-29_02-21-07,{},253.5590043067932,24,132,32449.557423353195,"{'num_steps_sampled': 158400, 'num_steps_trained': 158400, 'default': {'policy_loss': -0.12703874707221985, 'vf_explained_var': 0.926753044128418, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 10.651493072509766, 'entropy': 15.580692291259766, 'kl': 0.016220103949308395, 'total_loss': 10.540875434875488}, 'sample_time_ms': 234153.813, 'grad_time_ms': 697.494, 'load_time_ms': 1.499, 'update_time_ms': 2.553}",158400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},158400,cda-server-3,-51.09991333542589
+7ffa6ff4607a442eb508661143530d5b,32706.75931406021,50.0,False,-93.98838629496043,133,3192,1756427124,-56.2012627641873,1566858,1200,2025-08-29_02-25-24,{},257.201890707016,24,133,32706.75931406021,"{'num_steps_sampled': 159600, 'num_steps_trained': 159600, 'default': {'policy_loss': -0.1338438093662262, 'vf_explained_var': 0.8740109205245972, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 23.77425193786621, 'entropy': 15.500330924987793, 'kl': 0.015482652932405472, 'total_loss': 23.656084060668945}, 'sample_time_ms': 237288.091, 'grad_time_ms': 698.382, 'load_time_ms': 1.487, 'update_time_ms': 2.552}",159600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},159600,cda-server-3,-51.09991333542589
+7ffa6ff4607a442eb508661143530d5b,32972.81243276596,50.0,False,-93.98838629496043,134,3216,1756427390,-56.35912897300799,1566858,1200,2025-08-29_02-29-50,{},266.0531187057495,24,134,32972.81243276596,"{'num_steps_sampled': 160800, 'num_steps_trained': 160800, 'default': {'policy_loss': -0.13807255029678345, 'vf_explained_var': 0.9234582185745239, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 12.561636924743652, 'entropy': 15.425944328308105, 'kl': 0.01584099791944027, 'total_loss': 12.439602851867676}, 'sample_time_ms': 238727.197, 'grad_time_ms': 698.404, 'load_time_ms': 1.465, 'update_time_ms': 2.538}",160800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},160800,cda-server-3,-51.09991333542589
+7ffa6ff4607a442eb508661143530d5b,33229.83745789528,50.0,False,-93.98838629496043,135,3240,1756427647,-56.45386895016317,1566858,1200,2025-08-29_02-34-07,{},257.02502512931824,24,135,33229.83745789528,"{'num_steps_sampled': 162000, 'num_steps_trained': 162000, 'default': {'policy_loss': -0.12910763919353485, 'vf_explained_var': 0.9062867760658264, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 13.637471199035645, 'entropy': 15.366029739379883, 'kl': 0.01572471857070923, 'total_loss': 13.524285316467285}, 'sample_time_ms': 242870.088, 'grad_time_ms': 697.763, 'load_time_ms': 1.412, 'update_time_ms': 2.526}",162000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},162000,cda-server-3,-51.69027924314964
+7ffa6ff4607a442eb508661143530d5b,33496.372004032135,50.0,False,-96.18207900565379,136,3264,1756427914,-56.74371575273542,1566858,1200,2025-08-29_02-38-34,{},266.5345461368561,24,136,33496.372004032135,"{'num_steps_sampled': 163200, 'num_steps_trained': 163200, 'default': {'policy_loss': -0.12022534012794495, 'vf_explained_var': 0.8324052095413208, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 36.34650421142578, 'entropy': 15.361777305603027, 'kl': 0.013458560220897198, 'total_loss': 36.239906311035156}, 'sample_time_ms': 246076.3, 'grad_time_ms': 696.66, 'load_time_ms': 1.378, 'update_time_ms': 2.542}",163200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},163200,cda-server-3,-51.69027924314964
+7ffa6ff4607a442eb508661143530d5b,33734.41650533676,50.0,False,-96.18207900565379,137,3288,1756428152,-56.32144548975378,1566858,1200,2025-08-29_02-42-32,{},238.04450130462646,24,137,33734.41650533676,"{'num_steps_sampled': 164400, 'num_steps_trained': 164400, 'default': {'policy_loss': -0.14246992766857147, 'vf_explained_var': 0.8585296273231506, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 23.249908447265625, 'entropy': 15.368772506713867, 'kl': 0.0162531528621912, 'total_loss': 23.12389373779297}, 'sample_time_ms': 248368.943, 'grad_time_ms': 696.335, 'load_time_ms': 1.371, 'update_time_ms': 2.531}",164400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},164400,cda-server-3,-51.40951762538001
+7ffa6ff4607a442eb508661143530d5b,33984.96528124809,50.0,False,-96.18207900565379,138,3312,1756428402,-55.87355432583267,1566858,1200,2025-08-29_02-46-42,{},250.54877591133118,24,138,33984.96528124809,"{'num_steps_sampled': 165600, 'num_steps_trained': 165600, 'default': {'policy_loss': -0.1364402174949646, 'vf_explained_var': 0.9217305779457092, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 11.053638458251953, 'entropy': 15.37063980102539, 'kl': 0.016554994508624077, 'total_loss': 10.933959007263184}, 'sample_time_ms': 251865.386, 'grad_time_ms': 696.52, 'load_time_ms': 1.384, 'update_time_ms': 2.537}",165600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},165600,cda-server-3,-51.40951762538001
+7ffa6ff4607a442eb508661143530d5b,34215.42980790138,50.0,False,-96.18207900565379,139,3336,1756428633,-55.7571264546207,1566858,1200,2025-08-29_02-50-33,{},230.4645266532898,24,139,34215.42980790138,"{'num_steps_sampled': 166800, 'num_steps_trained': 166800, 'default': {'policy_loss': -0.11645391583442688, 'vf_explained_var': 0.9018339514732361, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 14.361066818237305, 'entropy': 15.37358570098877, 'kl': 0.014754108153283596, 'total_loss': 14.259552001953125}, 'sample_time_ms': 247727.037, 'grad_time_ms': 696.858, 'load_time_ms': 1.332, 'update_time_ms': 2.536}",166800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},166800,cda-server-3,-51.40951762538001
+7ffa6ff4607a442eb508661143530d5b,34444.063520908356,50.0,False,-82.93349279790904,140,3360,1756428861,-55.31171767952917,1566858,1200,2025-08-29_02-54-21,{},228.63371300697327,24,140,34444.063520908356,"{'num_steps_sampled': 168000, 'num_steps_trained': 168000, 'default': {'policy_loss': -0.1265363097190857, 'vf_explained_var': 0.9019301533699036, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 15.231989860534668, 'entropy': 15.134800910949707, 'kl': 0.015528642572462559, 'total_loss': 15.121174812316895}, 'sample_time_ms': 247796.422, 'grad_time_ms': 697.2, 'load_time_ms': 1.367, 'update_time_ms': 2.552}",168000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},168000,cda-server-3,-51.40951762538001
+7ffa6ff4607a442eb508661143530d5b,34734.807121276855,50.0,False,-82.93349279790904,141,3384,1756429152,-55.52611278640221,1566858,1200,2025-08-29_02-59-12,{},290.74360036849976,24,141,34734.807121276855,"{'num_steps_sampled': 169200, 'num_steps_trained': 169200, 'default': {'policy_loss': -0.12432999163866043, 'vf_explained_var': 0.8699341416358948, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 20.217525482177734, 'entropy': 15.227035522460938, 'kl': 0.015340043231844902, 'total_loss': 20.108726501464844}, 'sample_time_ms': 253175.993, 'grad_time_ms': 695.989, 'load_time_ms': 1.347, 'update_time_ms': 2.547}",169200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},169200,cda-server-3,-51.912489943053544
+7ffa6ff4607a442eb508661143530d5b,35009.46830415726,50.0,False,-68.6920622405596,142,3408,1756429427,-55.2943012698868,1566858,1200,2025-08-29_03-03-47,{},274.6611828804016,24,142,35009.46830415726,"{'num_steps_sampled': 170400, 'num_steps_trained': 170400, 'default': {'policy_loss': -0.12539464235305786, 'vf_explained_var': 0.8912346959114075, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 16.17151641845703, 'entropy': 15.258182525634766, 'kl': 0.016979189589619637, 'total_loss': 16.063312530517578}, 'sample_time_ms': 255286.282, 'grad_time_ms': 695.94, 'load_time_ms': 1.322, 'update_time_ms': 2.54}",170400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},170400,cda-server-3,-48.129169098879075
+7ffa6ff4607a442eb508661143530d5b,35289.0909883976,50.0,False,-72.46281944527702,143,3432,1756429706,-55.369224463036765,1566858,1200,2025-08-29_03-08-26,{},279.6226842403412,24,143,35289.0909883976,"{'num_steps_sampled': 171600, 'num_steps_trained': 171600, 'default': {'policy_loss': -0.1365930438041687, 'vf_explained_var': 0.912811279296875, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 13.83117389678955, 'entropy': 15.256481170654297, 'kl': 0.017149154096841812, 'total_loss': 13.711945533752441}, 'sample_time_ms': 257528.771, 'grad_time_ms': 695.525, 'load_time_ms': 1.324, 'update_time_ms': 2.547}",171600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},171600,cda-server-3,-48.129169098879075
+7ffa6ff4607a442eb508661143530d5b,35545.71752953529,50.0,False,-75.52954077212628,144,3456,1756429963,-55.29145726709157,1566858,1200,2025-08-29_03-12-43,{},256.6265411376953,24,144,35545.71752953529,"{'num_steps_sampled': 172800, 'num_steps_trained': 172800, 'default': {'policy_loss': -0.14175564050674438, 'vf_explained_var': 0.9073739647865295, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 15.246339797973633, 'entropy': 15.155366897583008, 'kl': 0.015908382833003998, 'total_loss': 15.120692253112793}, 'sample_time_ms': 256586.634, 'grad_time_ms': 694.93, 'load_time_ms': 1.436, 'update_time_ms': 2.567}",172800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},172800,cda-server-3,-48.129169098879075
+7ffa6ff4607a442eb508661143530d5b,35821.987554073334,50.0,False,-75.52954077212628,145,3480,1756430239,-54.90900760315747,1566858,1200,2025-08-29_03-17-19,{},276.27002453804016,24,145,35821.987554073334,"{'num_steps_sampled': 174000, 'num_steps_trained': 174000, 'default': {'policy_loss': -0.1382271647453308, 'vf_explained_var': 0.9090858101844788, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 13.077757835388184, 'entropy': 15.22294807434082, 'kl': 0.017670560628175735, 'total_loss': 12.95742130279541}, 'sample_time_ms': 258511.631, 'grad_time_ms': 694.44, 'load_time_ms': 1.449, 'update_time_ms': 2.561}",174000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},174000,cda-server-3,-48.129169098879075
+7ffa6ff4607a442eb508661143530d5b,36097.462760448456,50.0,False,-79.94967820860617,146,3504,1756430515,-55.34494379447178,1566858,1200,2025-08-29_03-21-55,{},275.47520637512207,24,146,36097.462760448456,"{'num_steps_sampled': 175200, 'num_steps_trained': 175200, 'default': {'policy_loss': -0.13017256557941437, 'vf_explained_var': 0.8144359588623047, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 34.2513542175293, 'entropy': 15.095757484436035, 'kl': 0.014651145786046982, 'total_loss': 34.136016845703125}, 'sample_time_ms': 259405.121, 'grad_time_ms': 694.972, 'load_time_ms': 1.499, 'update_time_ms': 2.528}",175200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},175200,cda-server-3,-49.86107777805505
+7ffa6ff4607a442eb508661143530d5b,36382.53430700302,50.0,False,-79.94967820860617,147,3528,1756430800,-55.20145012855225,1566858,1200,2025-08-29_03-26-40,{},285.07154655456543,24,147,36382.53430700302,"{'num_steps_sampled': 176400, 'num_steps_trained': 176400, 'default': {'policy_loss': -0.14096269011497498, 'vf_explained_var': 0.9250853657722473, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 10.303423881530762, 'entropy': 15.09123420715332, 'kl': 0.01631304621696472, 'total_loss': 10.178977966308594}, 'sample_time_ms': 264107.325, 'grad_time_ms': 695.414, 'load_time_ms': 1.51, 'update_time_ms': 2.538}",176400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},176400,cda-server-3,-49.86107777805505
+7ffa6ff4607a442eb508661143530d5b,36627.90810227394,50.0,False,-79.94967820860617,148,3552,1756431045,-55.17519078805936,1566858,1200,2025-08-29_03-30-45,{},245.3737952709198,24,148,36627.90810227394,"{'num_steps_sampled': 177600, 'num_steps_trained': 177600, 'default': {'policy_loss': -0.13445059955120087, 'vf_explained_var': 0.8926759362220764, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 16.581031799316406, 'entropy': 14.917000770568848, 'kl': 0.017347920686006546, 'total_loss': 16.46414566040039}, 'sample_time_ms': 263589.876, 'grad_time_ms': 695.333, 'load_time_ms': 1.522, 'update_time_ms': 2.577}",177600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},177600,cda-server-3,-49.868294210863574
+7ffa6ff4607a442eb508661143530d5b,36868.36815214157,50.0,False,-83.15647612467512,149,3576,1756431286,-55.496444152964315,1566858,1200,2025-08-29_03-34-46,{},240.46004986763,24,149,36868.36815214157,"{'num_steps_sampled': 178800, 'num_steps_trained': 178800, 'default': {'policy_loss': -0.1339377909898758, 'vf_explained_var': 0.8692839741706848, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 22.630189895629883, 'entropy': 14.89309310913086, 'kl': 0.014757196418941021, 'total_loss': 22.511194229125977}, 'sample_time_ms': 264588.746, 'grad_time_ms': 695.936, 'load_time_ms': 1.571, 'update_time_ms': 2.617}",178800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},178800,cda-server-3,-49.868294210863574
+7ffa6ff4607a442eb508661143530d5b,37109.659499168396,50.0,False,-83.15647612467512,150,3600,1756431527,-54.72428538909617,1566858,1200,2025-08-29_03-38-47,{},241.29134702682495,24,150,37109.659499168396,"{'num_steps_sampled': 180000, 'num_steps_trained': 180000, 'default': {'policy_loss': -0.12940925359725952, 'vf_explained_var': 0.9131262302398682, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 14.03437614440918, 'entropy': 14.959555625915527, 'kl': 0.016797177493572235, 'total_loss': 13.92197322845459}, 'sample_time_ms': 265853.553, 'grad_time_ms': 696.881, 'load_time_ms': 1.582, 'update_time_ms': 2.599}",180000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},180000,cda-server-3,-50.103271334104306
+7ffa6ff4607a442eb508661143530d5b,37372.81824541092,50.0,False,-83.15647612467512,151,3624,1756431790,-54.95631083997977,1566858,1200,2025-08-29_03-43-10,{},263.1587462425232,24,151,37372.81824541092,"{'num_steps_sampled': 181200, 'num_steps_trained': 181200, 'default': {'policy_loss': -0.13383673131465912, 'vf_explained_var': 0.9046041965484619, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 14.824883460998535, 'entropy': 15.047348022460938, 'kl': 0.015503380447626114, 'total_loss': 14.706741333007812}, 'sample_time_ms': 263094.858, 'grad_time_ms': 697.053, 'load_time_ms': 1.576, 'update_time_ms': 2.585}",181200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},181200,cda-server-3,-50.103271334104306
+7ffa6ff4607a442eb508661143530d5b,37592.00878381729,50.0,False,-83.15647612467512,152,3648,1756432009,-55.042814484307165,1566858,1200,2025-08-29_03-46-49,{},219.19053840637207,24,152,37592.00878381729,"{'num_steps_sampled': 182400, 'num_steps_trained': 182400, 'default': {'policy_loss': -0.14624041318893433, 'vf_explained_var': 0.8944531679153442, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 18.525949478149414, 'entropy': 15.080928802490234, 'kl': 0.017322639003396034, 'total_loss': 18.397249221801758}, 'sample_time_ms': 257547.52, 'grad_time_ms': 697.159, 'load_time_ms': 1.677, 'update_time_ms': 2.615}",182400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},182400,cda-server-3,-50.103271334104306
+7ffa6ff4607a442eb508661143530d5b,37824.8251748085,50.0,False,-76.25276652916195,153,3672,1756432242,-54.80760574942528,1566858,1200,2025-08-29_03-50-42,{},232.81639099121094,24,153,37824.8251748085,"{'num_steps_sampled': 183600, 'num_steps_trained': 183600, 'default': {'policy_loss': -0.12011555582284927, 'vf_explained_var': 0.9176934361457825, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 13.568841934204102, 'entropy': 14.749469757080078, 'kl': 0.015085036866366863, 'total_loss': 13.464000701904297}, 'sample_time_ms': 252867.137, 'grad_time_ms': 696.913, 'load_time_ms': 1.679, 'update_time_ms': 2.613}",183600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},183600,cda-server-3,-51.635439929331795
+7ffa6ff4607a442eb508661143530d5b,38087.21182632446,50.0,False,-76.25276652916195,154,3696,1756432505,-54.710018271540406,1566858,1200,2025-08-29_03-55-05,{},262.3866515159607,24,154,38087.21182632446,"{'num_steps_sampled': 184800, 'num_steps_trained': 184800, 'default': {'policy_loss': -0.11606475710868835, 'vf_explained_var': 0.9142285585403442, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 11.576321601867676, 'entropy': 14.832953453063965, 'kl': 0.01606798730790615, 'total_loss': 11.476527214050293}, 'sample_time_ms': 253442.364, 'grad_time_ms': 697.813, 'load_time_ms': 1.578, 'update_time_ms': 2.592}",184800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},184800,cda-server-3,-51.69439838421866
+7ffa6ff4607a442eb508661143530d5b,38328.99079108238,50.0,False,-71.88802226923642,155,3720,1756432746,-54.24950328876382,1566858,1200,2025-08-29_03-59-06,{},241.7789647579193,24,155,38328.99079108238,"{'num_steps_sampled': 186000, 'num_steps_trained': 186000, 'default': {'policy_loss': -0.12019169330596924, 'vf_explained_var': 0.9199265241622925, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 10.817547798156738, 'entropy': 14.937190055847168, 'kl': 0.0172748900949955, 'total_loss': 10.714847564697266}, 'sample_time_ms': 249993.188, 'grad_time_ms': 697.826, 'load_time_ms': 1.591, 'update_time_ms': 2.591}",186000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},186000,cda-server-3,-51.69439838421866
+7ffa6ff4607a442eb508661143530d5b,38605.58489322662,50.0,False,-60.94899705446273,156,3744,1756433023,-53.82121373845912,1566858,1200,2025-08-29_04-03-43,{},276.59410214424133,24,156,38605.58489322662,"{'num_steps_sampled': 187200, 'num_steps_trained': 187200, 'default': {'policy_loss': -0.11848673224449158, 'vf_explained_var': 0.9233921766281128, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 11.039652824401855, 'entropy': 14.721104621887207, 'kl': 0.016296055167913437, 'total_loss': 10.937665939331055}, 'sample_time_ms': 250104.087, 'grad_time_ms': 698.824, 'load_time_ms': 1.582, 'update_time_ms': 2.591}",187200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},187200,cda-server-3,-51.69439838421866
+7ffa6ff4607a442eb508661143530d5b,38854.769364118576,50.0,False,-87.87251747175968,157,3768,1756433272,-54.420660136849435,1566858,1200,2025-08-29_04-07-52,{},249.18447089195251,24,157,38854.769364118576,"{'num_steps_sampled': 188400, 'num_steps_trained': 188400, 'default': {'policy_loss': -0.11602246761322021, 'vf_explained_var': 0.8534746766090393, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 34.05124282836914, 'entropy': 14.746952056884766, 'kl': 0.014781979843974113, 'total_loss': 33.9501838684082}, 'sample_time_ms': 246516.264, 'grad_time_ms': 697.965, 'load_time_ms': 1.588, 'update_time_ms': 2.589}",188400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},188400,cda-server-3,-51.19677159146877
+7ffa6ff4607a442eb508661143530d5b,39079.19603562355,50.0,False,-87.87251747175968,158,3792,1756433497,-54.49077811088377,1566858,1200,2025-08-29_04-11-37,{},224.42667150497437,24,158,39079.19603562355,"{'num_steps_sampled': 189600, 'num_steps_trained': 189600, 'default': {'policy_loss': -0.1355181485414505, 'vf_explained_var': 0.9379551410675049, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 9.295769691467285, 'entropy': 14.72548770904541, 'kl': 0.015183514915406704, 'total_loss': 9.17562484741211}, 'sample_time_ms': 244421.345, 'grad_time_ms': 698.161, 'load_time_ms': 1.564, 'update_time_ms': 2.575}",189600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},189600,cda-server-3,-51.19677159146877
+7ffa6ff4607a442eb508661143530d5b,39353.902054309845,50.0,False,-87.87251747175968,159,3816,1756433771,-54.94807630013864,1566858,1200,2025-08-29_04-16-11,{},274.70601868629456,24,159,39353.902054309845,"{'num_steps_sampled': 190800, 'num_steps_trained': 190800, 'default': {'policy_loss': -0.13363111019134521, 'vf_explained_var': 0.8988499045372009, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 15.78367805480957, 'entropy': 14.716657638549805, 'kl': 0.015655651688575745, 'total_loss': 15.665897369384766}, 'sample_time_ms': 247846.641, 'grad_time_ms': 697.463, 'load_time_ms': 1.576, 'update_time_ms': 2.555}",190800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},190800,cda-server-3,-51.19677159146877
+7ffa6ff4607a442eb508661143530d5b,39582.731301784515,50.0,False,-87.87251747175968,160,3840,1756434000,-54.99390824289015,1566858,1200,2025-08-29_04-20-00,{},228.8292474746704,24,160,39582.731301784515,"{'num_steps_sampled': 192000, 'num_steps_trained': 192000, 'default': {'policy_loss': -0.13071568310260773, 'vf_explained_var': 0.8984204530715942, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 14.689178466796875, 'entropy': 14.589058876037598, 'kl': 0.016704510897397995, 'total_loss': 14.575374603271484}, 'sample_time_ms': 246600.492, 'grad_time_ms': 697.379, 'load_time_ms': 1.582, 'update_time_ms': 2.557}",192000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},192000,cda-server-3,-51.07453569163501
+7ffa6ff4607a442eb508661143530d5b,39832.147840976715,50.0,False,-72.07512178954435,161,3864,1756434250,-54.470317514482815,1566858,1200,2025-08-29_04-24-10,{},249.4165391921997,24,161,39832.147840976715,"{'num_steps_sampled': 193200, 'num_steps_trained': 193200, 'default': {'policy_loss': -0.12139880657196045, 'vf_explained_var': 0.9051095247268677, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 13.678767204284668, 'entropy': 14.655537605285645, 'kl': 0.015876276418566704, 'total_loss': 13.573442459106445}, 'sample_time_ms': 245226.549, 'grad_time_ms': 697.164, 'load_time_ms': 1.581, 'update_time_ms': 2.564}",193200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},193200,cda-server-3,-49.92447552813607
+7ffa6ff4607a442eb508661143530d5b,40083.27506804466,50.0,False,-72.07512178954435,162,3888,1756434501,-54.33385886284182,1566858,1200,2025-08-29_04-28-21,{},251.1272270679474,24,162,40083.27506804466,"{'num_steps_sampled': 194400, 'num_steps_trained': 194400, 'default': {'policy_loss': -0.11639168858528137, 'vf_explained_var': 0.8642103672027588, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 18.328638076782227, 'entropy': 14.6741943359375, 'kl': 0.016865216195583344, 'total_loss': 18.22932243347168}, 'sample_time_ms': 248419.243, 'grad_time_ms': 698.138, 'load_time_ms': 1.585, 'update_time_ms': 2.543}",194400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},194400,cda-server-3,-49.92447552813607
+7ffa6ff4607a442eb508661143530d5b,40346.9060986042,50.0,False,-65.13800180278425,163,3912,1756434764,-53.99017250932294,1566858,1200,2025-08-29_04-32-44,{},263.6310305595398,24,163,40346.9060986042,"{'num_steps_sampled': 195600, 'num_steps_trained': 195600, 'default': {'policy_loss': -0.0998261496424675, 'vf_explained_var': 0.8992434740066528, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 13.860114097595215, 'entropy': 14.415943145751953, 'kl': 0.016176464036107063, 'total_loss': 13.776667594909668}, 'sample_time_ms': 251501.186, 'grad_time_ms': 697.607, 'load_time_ms': 1.609, 'update_time_ms': 2.552}",195600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},195600,cda-server-3,-49.92447552813607
+7ffa6ff4607a442eb508661143530d5b,40603.62238764763,50.0,False,-65.93216349559958,164,3936,1756435021,-53.87998544779606,1566858,1200,2025-08-29_04-37-01,{},256.7162890434265,24,164,40603.62238764763,"{'num_steps_sampled': 196800, 'num_steps_trained': 196800, 'default': {'policy_loss': -0.14119286835193634, 'vf_explained_var': 0.8982493281364441, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 14.692657470703125, 'entropy': 14.567020416259766, 'kl': 0.01636369712650776, 'total_loss': 14.568032264709473}, 'sample_time_ms': 250935.353, 'grad_time_ms': 696.368, 'load_time_ms': 1.616, 'update_time_ms': 2.555}",196800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},196800,cda-server-3,-49.92447552813607
+7ffa6ff4607a442eb508661143530d5b,40852.17313194275,50.0,False,-65.93216349559958,165,3960,1756435270,-53.61150029783123,1566858,1200,2025-08-29_04-41-10,{},248.55074429512024,24,165,40852.17313194275,"{'num_steps_sampled': 198000, 'num_steps_trained': 198000, 'default': {'policy_loss': -0.1300608515739441, 'vf_explained_var': 0.9516932368278503, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 6.523504257202148, 'entropy': 14.476093292236328, 'kl': 0.017224567010998726, 'total_loss': 6.410882949829102}, 'sample_time_ms': 251613.146, 'grad_time_ms': 695.773, 'load_time_ms': 1.634, 'update_time_ms': 2.543}",198000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},198000,cda-server-3,-50.33426657153577
+7ffa6ff4607a442eb508661143530d5b,41124.5479888916,50.0,False,-65.93216349559958,166,3984,1756435542,-53.50584114911244,1566858,1200,2025-08-29_04-45-42,{},272.37485694885254,24,166,41124.5479888916,"{'num_steps_sampled': 199200, 'num_steps_trained': 199200, 'default': {'policy_loss': -0.13436605036258698, 'vf_explained_var': 0.9507731199264526, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 6.58724308013916, 'entropy': 14.292543411254883, 'kl': 0.016184302046895027, 'total_loss': 6.469264030456543}, 'sample_time_ms': 251192.499, 'grad_time_ms': 694.461, 'load_time_ms': 1.646, 'update_time_ms': 2.536}",199200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},199200,cda-server-3,-50.143069802916855
+7ffa6ff4607a442eb508661143530d5b,41352.61390995979,50.0,False,-65.93216349559958,167,4008,1756435770,-53.56339851585321,1566858,1200,2025-08-29_04-49-30,{},228.06592106819153,24,167,41352.61390995979,"{'num_steps_sampled': 200400, 'num_steps_trained': 200400, 'default': {'policy_loss': -0.1314341276884079, 'vf_explained_var': 0.9463107585906982, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 7.555251121520996, 'entropy': 14.319255828857422, 'kl': 0.016974905505776405, 'total_loss': 7.441004276275635}, 'sample_time_ms': 249078.991, 'grad_time_ms': 696.199, 'load_time_ms': 1.574, 'update_time_ms': 2.54}",200400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},200400,cda-server-3,-50.143069802916855
+7ffa6ff4607a442eb508661143530d5b,41594.20011138916,50.0,False,-61.94201876237128,168,4032,1756436012,-53.30442686539963,1566858,1200,2025-08-29_04-53-32,{},241.58620142936707,24,168,41594.20011138916,"{'num_steps_sampled': 201600, 'num_steps_trained': 201600, 'default': {'policy_loss': -0.12491725385189056, 'vf_explained_var': 0.9548305869102478, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 6.068361759185791, 'entropy': 14.498003005981445, 'kl': 0.017109356820583344, 'total_loss': 5.96076774597168}, 'sample_time_ms': 250794.892, 'grad_time_ms': 696.286, 'load_time_ms': 1.578, 'update_time_ms': 2.534}",201600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},201600,cda-server-3,-50.01706107894995
+7ffa6ff4607a442eb508661143530d5b,41838.673221588135,50.0,False,-70.72055208052299,169,4056,1756436256,-53.589283976993016,1566858,1200,2025-08-29_04-57-36,{},244.4731101989746,24,169,41838.673221588135,"{'num_steps_sampled': 202800, 'num_steps_trained': 202800, 'default': {'policy_loss': -0.12954078614711761, 'vf_explained_var': 0.9378258585929871, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 9.549816131591797, 'entropy': 14.280044555664062, 'kl': 0.016108253970742226, 'total_loss': 9.43658447265625}, 'sample_time_ms': 247771.674, 'grad_time_ms': 696.205, 'load_time_ms': 1.54, 'update_time_ms': 2.535}",202800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},202800,cda-server-3,-50.01706107894995
+7ffa6ff4607a442eb508661143530d5b,42089.681601285934,50.0,False,-70.72055208052299,170,4080,1756436507,-53.57962096219589,1566858,1200,2025-08-29_05-01-47,{},251.00837969779968,24,170,42089.681601285934,"{'num_steps_sampled': 204000, 'num_steps_trained': 204000, 'default': {'policy_loss': -0.12321165949106216, 'vf_explained_var': 0.9296780824661255, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 10.322514533996582, 'entropy': 14.262398719787598, 'kl': 0.014291416853666306, 'total_loss': 10.21377182006836}, 'sample_time_ms': 249990.139, 'grad_time_ms': 695.714, 'load_time_ms': 1.488, 'update_time_ms': 2.565}",204000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},204000,cda-server-3,-49.0508869398342
+7ffa6ff4607a442eb508661143530d5b,42328.8942193985,50.0,False,-70.72055208052299,171,4104,1756436747,-53.66866427174036,1566858,1200,2025-08-29_05-05-47,{},239.2126181125641,24,171,42328.8942193985,"{'num_steps_sampled': 205200, 'num_steps_trained': 205200, 'default': {'policy_loss': -0.12857241928577423, 'vf_explained_var': 0.9397585988044739, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 8.941105842590332, 'entropy': 14.35903263092041, 'kl': 0.016312314197421074, 'total_loss': 8.82905101776123}, 'sample_time_ms': 248969.415, 'grad_time_ms': 696.245, 'load_time_ms': 1.408, 'update_time_ms': 2.521}",205200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},205200,cda-server-3,-49.0508869398342
+7ffa6ff4607a442eb508661143530d5b,42626.244643211365,50.0,False,-70.72055208052299,172,4128,1756437044,-53.580794914051395,1566858,1200,2025-08-29_05-10-44,{},297.3504238128662,24,172,42626.244643211365,"{'num_steps_sampled': 206400, 'num_steps_trained': 206400, 'default': {'policy_loss': -0.12440269440412521, 'vf_explained_var': 0.9441279172897339, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 7.573556900024414, 'entropy': 14.359490394592285, 'kl': 0.016533873975276947, 'total_loss': 7.46589469909668}, 'sample_time_ms': 253592.02, 'grad_time_ms': 695.983, 'load_time_ms': 1.393, 'update_time_ms': 2.517}",206400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},206400,cda-server-3,-49.0508869398342
+7ffa6ff4607a442eb508661143530d5b,42860.58568787575,50.0,False,-70.72055208052299,173,4152,1756437278,-53.7161237568239,1566858,1200,2025-08-29_05-14-38,{},234.34104466438293,24,173,42860.58568787575,"{'num_steps_sampled': 207600, 'num_steps_trained': 207600, 'default': {'policy_loss': -0.12295085936784744, 'vf_explained_var': 0.9145262837409973, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 12.39100170135498, 'entropy': 14.055234909057617, 'kl': 0.016166819259524345, 'total_loss': 12.284420013427734}, 'sample_time_ms': 250662.754, 'grad_time_ms': 696.223, 'load_time_ms': 1.405, 'update_time_ms': 2.533}",207600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},207600,cda-server-3,-49.0508869398342
+7ffa6ff4607a442eb508661143530d5b,43113.22520804405,50.0,False,-86.90779398729012,174,4176,1756437531,-53.890788490715124,1566858,1200,2025-08-29_05-18-51,{},252.63952016830444,24,174,43113.22520804405,"{'num_steps_sampled': 208800, 'num_steps_trained': 208800, 'default': {'policy_loss': -0.13327403366565704, 'vf_explained_var': 0.9170873165130615, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 14.936214447021484, 'entropy': 14.28572940826416, 'kl': 0.014670169912278652, 'total_loss': 14.817794799804688}, 'sample_time_ms': 250254.236, 'grad_time_ms': 697.167, 'load_time_ms': 1.331, 'update_time_ms': 2.562}",208800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},208800,cda-server-3,-50.13486725085076
+7ffa6ff4607a442eb508661143530d5b,43354.569568157196,50.0,False,-86.90779398729012,175,4200,1756437772,-53.70484142252989,1566858,1200,2025-08-29_05-22-52,{},241.34436011314392,24,175,43354.569568157196,"{'num_steps_sampled': 210000, 'num_steps_trained': 210000, 'default': {'policy_loss': -0.1241101399064064, 'vf_explained_var': 0.928949773311615, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 10.733738899230957, 'entropy': 14.182540893554688, 'kl': 0.01592331938445568, 'total_loss': 10.625751495361328}, 'sample_time_ms': 249532.855, 'grad_time_ms': 697.821, 'load_time_ms': 1.38, 'update_time_ms': 2.569}",210000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},210000,cda-server-3,-50.13486725085076
+7ffa6ff4607a442eb508661143530d5b,43621.62365627289,50.0,False,-86.90779398729012,176,4224,1756438039,-53.60701516354529,1566858,1200,2025-08-29_05-27-19,{},267.05408811569214,24,176,43621.62365627289,"{'num_steps_sampled': 211200, 'num_steps_trained': 211200, 'default': {'policy_loss': -0.12065468728542328, 'vf_explained_var': 0.9261561632156372, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 10.087909698486328, 'entropy': 13.944937705993652, 'kl': 0.015050739049911499, 'total_loss': 9.98249340057373}, 'sample_time_ms': 249000.493, 'grad_time_ms': 698.123, 'load_time_ms': 1.361, 'update_time_ms': 2.578}",211200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},211200,cda-server-3,-50.13486725085076
+7ffa6ff4607a442eb508661143530d5b,43901.720831632614,50.0,False,-86.90779398729012,177,4248,1756438319,-53.543342405927405,1566858,1200,2025-08-29_05-31-59,{},280.09717535972595,24,177,43901.720831632614,"{'num_steps_sampled': 212400, 'num_steps_trained': 212400, 'default': {'policy_loss': -0.1298007220029831, 'vf_explained_var': 0.9530531764030457, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 6.61334228515625, 'entropy': 14.069295883178711, 'kl': 0.01683618873357773, 'total_loss': 6.500588417053223}, 'sample_time_ms': 254205.562, 'grad_time_ms': 696.216, 'load_time_ms': 1.348, 'update_time_ms': 2.603}",212400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},212400,cda-server-3,-50.69632375703871
+7ffa6ff4607a442eb508661143530d5b,44153.83974337578,50.0,False,-66.96402946455778,178,4272,1756438572,-53.031808792535166,1566858,1200,2025-08-29_05-36-12,{},252.11891174316406,24,178,44153.83974337578,"{'num_steps_sampled': 213600, 'num_steps_trained': 213600, 'default': {'policy_loss': -0.130199134349823, 'vf_explained_var': 0.9504425525665283, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 6.463962078094482, 'entropy': 13.97944164276123, 'kl': 0.01592904143035412, 'total_loss': 6.349891185760498}, 'sample_time_ms': 255259.466, 'grad_time_ms': 695.531, 'load_time_ms': 1.36, 'update_time_ms': 2.621}",213600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},213600,cda-server-3,-49.36445515990393
+7ffa6ff4607a442eb508661143530d5b,44386.934242248535,50.0,False,-66.96402946455778,179,4296,1756438805,-53.042538560292826,1566858,1200,2025-08-29_05-40-05,{},233.09449887275696,24,179,44386.934242248535,"{'num_steps_sampled': 214800, 'num_steps_trained': 214800, 'default': {'policy_loss': -0.13994605839252472, 'vf_explained_var': 0.9725171327590942, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 3.6190366744995117, 'entropy': 13.99028205871582, 'kl': 0.01614346355199814, 'total_loss': 3.495435953140259}, 'sample_time_ms': 254121.206, 'grad_time_ms': 695.892, 'load_time_ms': 1.397, 'update_time_ms': 2.636}",214800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},214800,cda-server-3,-49.36445515990393
+7ffa6ff4607a442eb508661143530d5b,44641.26664805412,50.0,False,-66.96402946455778,180,4320,1756439059,-53.1373632716962,1566858,1200,2025-08-29_05-44-19,{},254.33240580558777,24,180,44641.26664805412,"{'num_steps_sampled': 216000, 'num_steps_trained': 216000, 'default': {'policy_loss': -0.1175423189997673, 'vf_explained_var': 0.9335753917694092, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 9.291361808776855, 'entropy': 13.877095222473145, 'kl': 0.015891285613179207, 'total_loss': 9.189908981323242}, 'sample_time_ms': 254453.815, 'grad_time_ms': 695.543, 'load_time_ms': 1.494, 'update_time_ms': 2.601}",216000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},216000,cda-server-3,-49.36445515990393
+7ffa6ff4607a442eb508661143530d5b,44861.4182267189,50.0,False,-94.56750234999927,181,4344,1756439279,-53.63649838877152,1566858,1200,2025-08-29_05-47-59,{},220.15157866477966,24,181,44861.4182267189,"{'num_steps_sampled': 217200, 'num_steps_trained': 217200, 'default': {'policy_loss': -0.11929008364677429, 'vf_explained_var': 0.9169760942459106, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 14.88999080657959, 'entropy': 13.808137893676758, 'kl': 0.013675041496753693, 'total_loss': 14.7845458984375}, 'sample_time_ms': 252547.651, 'grad_time_ms': 695.443, 'load_time_ms': 1.57, 'update_time_ms': 2.619}",217200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},217200,cda-server-3,-49.36445515990393
+7ffa6ff4607a442eb508661143530d5b,45094.17157244682,50.0,False,-94.56750234999927,182,4368,1756439512,-53.8451041786324,1566858,1200,2025-08-29_05-51-52,{},232.75334572792053,24,182,45094.17157244682,"{'num_steps_sampled': 218400, 'num_steps_trained': 218400, 'default': {'policy_loss': -0.1277659684419632, 'vf_explained_var': 0.9497382044792175, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 7.0899529457092285, 'entropy': 13.709293365478516, 'kl': 0.015457798726856709, 'total_loss': 6.977838516235352}, 'sample_time_ms': 246088.625, 'grad_time_ms': 694.91, 'load_time_ms': 1.472, 'update_time_ms': 2.614}",218400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},218400,cda-server-3,-49.08233276373182
+7ffa6ff4607a442eb508661143530d5b,45367.272315979004,50.0,False,-94.56750234999927,183,4392,1756439785,-54.12122982188653,1566858,1200,2025-08-29_05-56-25,{},273.1007435321808,24,183,45367.272315979004,"{'num_steps_sampled': 219600, 'num_steps_trained': 219600, 'default': {'policy_loss': -0.11586789041757584, 'vf_explained_var': 0.8821346759796143, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 20.151500701904297, 'entropy': 13.90664291381836, 'kl': 0.012427722103893757, 'total_loss': 20.048213958740234}, 'sample_time_ms': 249965.472, 'grad_time_ms': 694.231, 'load_time_ms': 1.356, 'update_time_ms': 2.572}",219600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},219600,cda-server-3,-49.08233276373182
+7ffa6ff4607a442eb508661143530d5b,45608.537001371384,50.0,False,-94.56750234999927,184,4416,1756440026,-54.261823213783686,1566858,1200,2025-08-29_06-00-26,{},241.26468539237976,24,184,45608.537001371384,"{'num_steps_sampled': 220800, 'num_steps_trained': 220800, 'default': {'policy_loss': -0.1134781688451767, 'vf_explained_var': 0.9582895636558533, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 6.243759632110596, 'entropy': 13.678974151611328, 'kl': 0.013173202984035015, 'total_loss': 6.143619060516357}, 'sample_time_ms': 248827.732, 'grad_time_ms': 694.495, 'load_time_ms': 1.368, 'update_time_ms': 2.567}",220800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},220800,cda-server-3,-49.08233276373182
+7ffa6ff4607a442eb508661143530d5b,45841.35560679436,50.0,False,-80.45771722108525,185,4440,1756440259,-53.90100144491721,1566858,1200,2025-08-29_06-04-19,{},232.81860542297363,24,185,45841.35560679436,"{'num_steps_sampled': 222000, 'num_steps_trained': 222000, 'default': {'policy_loss': -0.12608960270881653, 'vf_explained_var': 0.9605620503425598, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 5.352666854858398, 'entropy': 13.728774070739746, 'kl': 0.016028843820095062, 'total_loss': 5.242806911468506}, 'sample_time_ms': 247974.951, 'grad_time_ms': 694.704, 'load_time_ms': 1.354, 'update_time_ms': 2.577}",222000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},222000,cda-server-3,-49.08233276373182
+7ffa6ff4607a442eb508661143530d5b,46075.51358270645,50.0,False,-103.01053707639123,186,4464,1756440493,-54.77867174185004,1566858,1200,2025-08-29_06-08-13,{},234.15797591209412,24,186,46075.51358270645,"{'num_steps_sampled': 223200, 'num_steps_trained': 223200, 'default': {'policy_loss': -0.1334741711616516, 'vf_explained_var': 0.8342825174331665, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 47.81349563598633, 'entropy': 13.694595336914062, 'kl': 0.012813089415431023, 'total_loss': 47.69300079345703}, 'sample_time_ms': 244685.472, 'grad_time_ms': 694.641, 'load_time_ms': 1.328, 'update_time_ms': 2.575}",223200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},223200,cda-server-3,-49.99546774844703
+7ffa6ff4607a442eb508661143530d5b,46318.27295923233,50.0,False,-103.01053707639123,187,4488,1756440736,-54.5534802284662,1566858,1200,2025-08-29_06-12-16,{},242.7593765258789,24,187,46318.27295923233,"{'num_steps_sampled': 224400, 'num_steps_trained': 224400, 'default': {'policy_loss': -0.12978488206863403, 'vf_explained_var': 0.9535620212554932, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 6.927732467651367, 'entropy': 13.717631340026855, 'kl': 0.016308149322867393, 'total_loss': 6.814460277557373}, 'sample_time_ms': 240951.392, 'grad_time_ms': 694.812, 'load_time_ms': 1.384, 'update_time_ms': 2.555}",224400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},224400,cda-server-3,-49.99546774844703
+7ffa6ff4607a442eb508661143530d5b,46580.00093770027,50.0,False,-103.01053707639123,188,4512,1756440998,-54.33199623642102,1566858,1200,2025-08-29_06-16-38,{},261.7279784679413,24,188,46580.00093770027,"{'num_steps_sampled': 225600, 'num_steps_trained': 225600, 'default': {'policy_loss': -0.14069527387619019, 'vf_explained_var': 0.9462458491325378, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 7.247664451599121, 'entropy': 13.783607482910156, 'kl': 0.017661113291978836, 'total_loss': 7.124850749969482}, 'sample_time_ms': 241911.574, 'grad_time_ms': 695.705, 'load_time_ms': 1.292, 'update_time_ms': 2.557}",225600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},225600,cda-server-3,-50.05406011084624
+7ffa6ff4607a442eb508661143530d5b,46799.67392349243,50.0,False,-103.01053707639123,189,4536,1756441217,-54.52519962590397,1566858,1200,2025-08-29_06-20-17,{},219.67298579216003,24,189,46799.67392349243,"{'num_steps_sampled': 226800, 'num_steps_trained': 226800, 'default': {'policy_loss': -0.1274680346250534, 'vf_explained_var': 0.9378973841667175, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 9.567581176757812, 'entropy': 13.739153861999512, 'kl': 0.016897717490792274, 'total_loss': 9.457221984863281}, 'sample_time_ms': 240568.3, 'grad_time_ms': 696.809, 'load_time_ms': 1.279, 'update_time_ms': 2.566}",226800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},226800,cda-server-3,-50.05406011084624
+7ffa6ff4607a442eb508661143530d5b,47040.63526558876,50.0,False,-103.01053707639123,190,4560,1756441458,-53.97254179020705,1566858,1200,2025-08-29_06-24-18,{},240.96134209632874,24,190,47040.63526558876,"{'num_steps_sampled': 228000, 'num_steps_trained': 228000, 'default': {'policy_loss': -0.12781214714050293, 'vf_explained_var': 0.9580786824226379, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 6.02968168258667, 'entropy': 13.766950607299805, 'kl': 0.017274074256420135, 'total_loss': 5.9193596839904785}, 'sample_time_ms': 239230.328, 'grad_time_ms': 697.689, 'load_time_ms': 1.28, 'update_time_ms': 2.571}",228000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},228000,cda-server-3,-50.59615050914242
+7ffa6ff4607a442eb508661143530d5b,47280.122878313065,50.0,False,-101.10936583155627,191,4584,1756441698,-54.279260741314474,1566858,1200,2025-08-29_06-28-18,{},239.4876127243042,24,191,47280.122878313065,"{'num_steps_sampled': 229200, 'num_steps_trained': 229200, 'default': {'policy_loss': -0.11484278738498688, 'vf_explained_var': 0.8638635277748108, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 30.318471908569336, 'entropy': 13.744145393371582, 'kl': 0.013898147270083427, 'total_loss': 30.21769905090332}, 'sample_time_ms': 241163.985, 'grad_time_ms': 697.753, 'load_time_ms': 1.198, 'update_time_ms': 2.583}",229200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},229200,cda-server-3,-50.59615050914242
+7ffa6ff4607a442eb508661143530d5b,47572.10169816017,50.0,False,-101.10936583155627,192,4608,1756441990,-54.16387812212497,1566858,1200,2025-08-29_06-33-10,{},291.97881984710693,24,192,47572.10169816017,"{'num_steps_sampled': 230400, 'num_steps_trained': 230400, 'default': {'policy_loss': -0.12756960093975067, 'vf_explained_var': 0.9341971278190613, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 9.284131050109863, 'entropy': 13.830009460449219, 'kl': 0.014379401691257954, 'total_loss': 9.171121597290039}, 'sample_time_ms': 247086.161, 'grad_time_ms': 697.944, 'load_time_ms': 1.293, 'update_time_ms': 2.578}",230400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},230400,cda-server-3,-50.59615050914242
+7ffa6ff4607a442eb508661143530d5b,47812.417081832886,50.0,False,-101.10936583155627,193,4632,1756442230,-53.96007896743721,1566858,1200,2025-08-29_06-37-10,{},240.31538367271423,24,193,47812.417081832886,"{'num_steps_sampled': 231600, 'num_steps_trained': 231600, 'default': {'policy_loss': -0.12245944887399673, 'vf_explained_var': 0.9257941842079163, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 10.380656242370605, 'entropy': 13.640023231506348, 'kl': 0.015688113868236542, 'total_loss': 10.274081230163574}, 'sample_time_ms': 243806.656, 'grad_time_ms': 698.762, 'load_time_ms': 1.389, 'update_time_ms': 2.619}",231600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},231600,cda-server-3,-50.59615050914242
+7ffa6ff4607a442eb508661143530d5b,48095.95903515816,50.0,False,-101.10936583155627,194,4656,1756442514,-54.306236617855,1566858,1200,2025-08-29_06-41-54,{},283.5419533252716,24,194,48095.95903515816,"{'num_steps_sampled': 232800, 'num_steps_trained': 232800, 'default': {'policy_loss': -0.1166752278804779, 'vf_explained_var': 0.8864515423774719, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 21.87663459777832, 'entropy': 13.669998168945312, 'kl': 0.013523032888770103, 'total_loss': 21.773651123046875}, 'sample_time_ms': 248034.822, 'grad_time_ms': 698.258, 'load_time_ms': 1.439, 'update_time_ms': 2.586}",232800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},232800,cda-server-3,-50.90256704987865
+7ffa6ff4607a442eb508661143530d5b,48327.729848623276,50.0,False,-89.07687021099098,195,4680,1756442746,-53.548609026782785,1566858,1200,2025-08-29_06-45-46,{},231.7708134651184,24,195,48327.729848623276,"{'num_steps_sampled': 234000, 'num_steps_trained': 234000, 'default': {'policy_loss': -0.10917246341705322, 'vf_explained_var': 0.9413497447967529, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 9.061535835266113, 'entropy': 13.525612831115723, 'kl': 0.013985957019031048, 'total_loss': 8.96652603149414}, 'sample_time_ms': 247929.484, 'grad_time_ms': 698.848, 'load_time_ms': 1.42, 'update_time_ms': 2.59}",234000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},234000,cda-server-3,-51.49734124044208
+7ffa6ff4607a442eb508661143530d5b,48549.923015117645,50.0,False,-89.07687021099098,196,4704,1756442968,-53.54348130786682,1566858,1200,2025-08-29_06-49-28,{},222.1931664943695,24,196,48549.923015117645,"{'num_steps_sampled': 235200, 'num_steps_trained': 235200, 'default': {'policy_loss': -0.1291184425354004, 'vf_explained_var': 0.949661374092102, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 6.641875267028809, 'entropy': 13.663838386535645, 'kl': 0.015983549878001213, 'total_loss': 6.528940200805664}, 'sample_time_ms': 246731.703, 'grad_time_ms': 700.173, 'load_time_ms': 1.401, 'update_time_ms': 2.595}",235200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},235200,cda-server-3,-50.9655152146521
+7ffa6ff4607a442eb508661143530d5b,48802.114077329636,50.0,False,-89.07687021099098,197,4728,1756443220,-53.34710076680881,1566858,1200,2025-08-29_06-53-40,{},252.19106221199036,24,197,48802.114077329636,"{'num_steps_sampled': 236400, 'num_steps_trained': 236400, 'default': {'policy_loss': -0.12881432473659515, 'vf_explained_var': 0.9463976621627808, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 7.419828414916992, 'entropy': 13.690502166748047, 'kl': 0.015713712200522423, 'total_loss': 7.306924819946289}, 'sample_time_ms': 247675.32, 'grad_time_ms': 699.706, 'load_time_ms': 1.417, 'update_time_ms': 2.593}",236400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},236400,cda-server-3,-49.31600089328854
+7ffa6ff4607a442eb508661143530d5b,49033.07736849785,50.0,False,-62.32068669276827,198,4752,1756443451,-53.089172422911425,1566858,1200,2025-08-29_06-57-31,{},230.9632911682129,24,198,49033.07736849785,"{'num_steps_sampled': 237600, 'num_steps_trained': 237600, 'default': {'policy_loss': -0.1262063831090927, 'vf_explained_var': 0.9186666011810303, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 11.172323226928711, 'entropy': 13.764321327209473, 'kl': 0.014617557637393475, 'total_loss': 11.060917854309082}, 'sample_time_ms': 244599.413, 'grad_time_ms': 698.993, 'load_time_ms': 1.506, 'update_time_ms': 2.574}",237600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},237600,cda-server-3,-49.31600089328854
+7ffa6ff4607a442eb508661143530d5b,49260.586948394775,50.0,False,-62.32068669276827,199,4776,1756443678,-52.896120268548586,1566858,1200,2025-08-29_07-01-18,{},227.50957989692688,24,199,49260.586948394775,"{'num_steps_sampled': 238800, 'num_steps_trained': 238800, 'default': {'policy_loss': -0.131291925907135, 'vf_explained_var': 0.9453469514846802, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 7.727341651916504, 'entropy': 13.57127571105957, 'kl': 0.016682572662830353, 'total_loss': 7.612940311431885}, 'sample_time_ms': 245384.726, 'grad_time_ms': 697.371, 'load_time_ms': 1.513, 'update_time_ms': 2.578}",238800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},238800,cda-server-3,-49.2260156024492
+7ffa6ff4607a442eb508661143530d5b,49541.825184345245,50.0,False,-63.96882214668029,200,4800,1756443960,-53.008628398442994,1566858,1200,2025-08-29_07-06-00,{},281.23823595046997,24,200,49541.825184345245,"{'num_steps_sampled': 240000, 'num_steps_trained': 240000, 'default': {'policy_loss': -0.12226442247629166, 'vf_explained_var': 0.9560834169387817, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 6.315108776092529, 'entropy': 13.709651947021484, 'kl': 0.014527440071105957, 'total_loss': 6.207553386688232}, 'sample_time_ms': 249413.369, 'grad_time_ms': 696.563, 'load_time_ms': 1.415, 'update_time_ms': 2.597}",240000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},240000,cda-server-3,-49.2260156024492
+7ffa6ff4607a442eb508661143530d5b,49783.74181032181,50.0,False,-63.96882214668029,201,4824,1756444202,-52.91975331889113,1566858,1200,2025-08-29_07-10-02,{},241.9166259765625,24,201,49783.74181032181,"{'num_steps_sampled': 241200, 'num_steps_trained': 241200, 'default': {'policy_loss': -0.13792450726032257, 'vf_explained_var': 0.9654067158699036, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 4.6045002937316895, 'entropy': 13.688363075256348, 'kl': 0.015577022917568684, 'total_loss': 4.4823479652404785}, 'sample_time_ms': 249655.821, 'grad_time_ms': 696.887, 'load_time_ms': 1.501, 'update_time_ms': 2.586}",241200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},241200,cda-server-3,-49.2260156024492
+7ffa6ff4607a442eb508661143530d5b,50030.31158399582,50.0,False,-105.25511476379766,202,4848,1756444448,-53.19758526213469,1566858,1200,2025-08-29_07-14-08,{},246.56977367401123,24,202,50030.31158399582,"{'num_steps_sampled': 242400, 'num_steps_trained': 242400, 'default': {'policy_loss': -0.0947578102350235, 'vf_explained_var': 0.8161755204200745, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 43.95417785644531, 'entropy': 13.589754104614258, 'kl': 0.009557071141898632, 'total_loss': 43.86909866333008}, 'sample_time_ms': 245114.838, 'grad_time_ms': 696.965, 'load_time_ms': 1.498, 'update_time_ms': 2.586}",242400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},242400,cda-server-3,-48.98603498873693
+7ffa6ff4607a442eb508661143530d5b,50312.89493370056,50.0,False,-105.25511476379766,203,4872,1756444731,-53.27160994877568,1566858,1200,2025-08-29_07-18-51,{},282.58334970474243,24,203,50312.89493370056,"{'num_steps_sampled': 243600, 'num_steps_trained': 243600, 'default': {'policy_loss': -0.12207228690385818, 'vf_explained_var': 0.9586093425750732, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 5.939465045928955, 'entropy': 13.629680633544922, 'kl': 0.016084210947155952, 'total_loss': 5.833678245544434}, 'sample_time_ms': 249342.276, 'grad_time_ms': 696.344, 'load_time_ms': 1.497, 'update_time_ms': 2.542}",243600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},243600,cda-server-3,-48.98603498873693
+7ffa6ff4607a442eb508661143530d5b,50540.21925139427,50.0,False,-105.25511476379766,204,4896,1756444958,-53.370897240358936,1566858,1200,2025-08-29_07-22-38,{},227.32431769371033,24,204,50540.21925139427,"{'num_steps_sampled': 244800, 'num_steps_trained': 244800, 'default': {'policy_loss': -0.134691059589386, 'vf_explained_var': 0.9489078521728516, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 6.949552536010742, 'entropy': 13.448970794677734, 'kl': 0.017023924738168716, 'total_loss': 6.832098007202148}, 'sample_time_ms': 243720.45, 'grad_time_ms': 696.352, 'load_time_ms': 1.522, 'update_time_ms': 2.551}",244800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},244800,cda-server-3,-48.98603498873693
+7ffa6ff4607a442eb508661143530d5b,50779.589007377625,50.0,False,-105.25511476379766,205,4920,1756445197,-53.458245488786794,1566858,1200,2025-08-29_07-26-37,{},239.36975598335266,24,205,50779.589007377625,"{'num_steps_sampled': 246000, 'num_steps_trained': 246000, 'default': {'policy_loss': -0.12922601401805878, 'vf_explained_var': 0.9597580432891846, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 5.400381088256836, 'entropy': 13.406126022338867, 'kl': 0.017056623473763466, 'total_loss': 5.288424968719482}, 'sample_time_ms': 244481.14, 'grad_time_ms': 695.513, 'load_time_ms': 1.551, 'update_time_ms': 2.528}",246000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},246000,cda-server-3,-48.98603498873693
+7ffa6ff4607a442eb508661143530d5b,51067.62697553635,50.0,False,-58.935624792842,206,4944,1756445486,-53.01138822250478,1566858,1200,2025-08-29_07-31-26,{},288.0379681587219,24,206,51067.62697553635,"{'num_steps_sampled': 247200, 'num_steps_trained': 247200, 'default': {'policy_loss': -0.1171593964099884, 'vf_explained_var': 0.9476562142372131, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 6.902299880981445, 'entropy': 13.519577026367188, 'kl': 0.01573404110968113, 'total_loss': 6.801071643829346}, 'sample_time_ms': 251066.806, 'grad_time_ms': 694.24, 'load_time_ms': 1.59, 'update_time_ms': 2.545}",247200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},247200,cda-server-3,-51.01486236176433
+7ffa6ff4607a442eb508661143530d5b,51325.64721798897,50.0,False,-59.954047230685426,207,4968,1756445744,-53.17399045538728,1566858,1200,2025-08-29_07-35-44,{},258.02024245262146,24,207,51325.64721798897,"{'num_steps_sampled': 248400, 'num_steps_trained': 248400, 'default': {'policy_loss': -0.12967216968536377, 'vf_explained_var': 0.9467138648033142, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 7.576404094696045, 'entropy': 13.4369478225708, 'kl': 0.01681762933731079, 'total_loss': 7.4637603759765625}, 'sample_time_ms': 251649.088, 'grad_time_ms': 694.83, 'load_time_ms': 1.601, 'update_time_ms': 2.567}",248400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},248400,cda-server-3,-51.01486236176433
+7ffa6ff4607a442eb508661143530d5b,51613.95212769508,50.0,False,-61.44569830893842,208,4992,1756446032,-53.11367069586581,1566858,1200,2025-08-29_07-40-32,{},288.3049097061157,24,208,51613.95212769508,"{'num_steps_sampled': 249600, 'num_steps_trained': 249600, 'default': {'policy_loss': -0.12367913126945496, 'vf_explained_var': 0.9593546986579895, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 5.667877674102783, 'entropy': 13.433245658874512, 'kl': 0.015404744073748589, 'total_loss': 5.55979585647583}, 'sample_time_ms': 257382.956, 'grad_time_ms': 695.117, 'load_time_ms': 1.613, 'update_time_ms': 2.55}",249600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},249600,cda-server-3,-50.029668242570246
+7ffa6ff4607a442eb508661143530d5b,51856.40980172157,50.0,False,-71.96171297636684,209,5016,1756446274,-53.29923892735492,1566858,1200,2025-08-29_07-44-34,{},242.45767402648926,24,209,51856.40980172157,"{'num_steps_sampled': 250800, 'num_steps_trained': 250800, 'default': {'policy_loss': -0.13099414110183716, 'vf_explained_var': 0.9026677012443542, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 15.419004440307617, 'entropy': 13.460000991821289, 'kl': 0.015934377908706665, 'total_loss': 15.304142951965332}, 'sample_time_ms': 258877.945, 'grad_time_ms': 695.007, 'load_time_ms': 1.564, 'update_time_ms': 2.538}",250800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},250800,cda-server-3,-48.49890370956543
+7ffa6ff4607a442eb508661143530d5b,52074.34491252899,50.0,False,-76.06146106644461,210,5040,1756446492,-53.62399428423448,1566858,1200,2025-08-29_07-48-12,{},217.93511080741882,24,210,52074.34491252899,"{'num_steps_sampled': 252000, 'num_steps_trained': 252000, 'default': {'policy_loss': -0.12491732090711594, 'vf_explained_var': 0.8966451287269592, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 20.819692611694336, 'entropy': 13.459056854248047, 'kl': 0.014209000393748283, 'total_loss': 20.70915985107422}, 'sample_time_ms': 252547.281, 'grad_time_ms': 695.311, 'load_time_ms': 1.597, 'update_time_ms': 2.512}",252000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},252000,cda-server-3,-48.49890370956543
+7ffa6ff4607a442eb508661143530d5b,52310.4198474884,50.0,False,-88.33545886911972,211,5064,1756446728,-53.6863478223295,1566858,1200,2025-08-29_07-52-08,{},236.07493495941162,24,211,52310.4198474884,"{'num_steps_sampled': 253200, 'num_steps_trained': 253200, 'default': {'policy_loss': -0.1190461590886116, 'vf_explained_var': 0.9395532608032227, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 11.596328735351562, 'entropy': 13.170246124267578, 'kl': 0.013326210901141167, 'total_loss': 11.490775108337402}, 'sample_time_ms': 251962.736, 'grad_time_ms': 695.664, 'load_time_ms': 1.599, 'update_time_ms': 2.514}",253200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},253200,cda-server-3,-48.40327379293791
+7ffa6ff4607a442eb508661143530d5b,52538.71108055115,50.0,False,-88.33545886911972,212,5088,1756446957,-53.62617516991392,1566858,1200,2025-08-29_07-55-57,{},228.29123306274414,24,212,52538.71108055115,"{'num_steps_sampled': 254400, 'num_steps_trained': 254400, 'default': {'policy_loss': -0.1376982182264328, 'vf_explained_var': 0.9368199110031128, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 9.25528621673584, 'entropy': 13.441559791564941, 'kl': 0.01579122245311737, 'total_loss': 9.133577346801758}, 'sample_time_ms': 250135.485, 'grad_time_ms': 695.088, 'load_time_ms': 1.609, 'update_time_ms': 2.514}",254400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},254400,cda-server-3,-48.388893830147204
+7ffa6ff4607a442eb508661143530d5b,52778.71068429947,50.0,False,-88.33545886911972,213,5112,1756447197,-54.284821358814376,1566858,1200,2025-08-29_07-59-57,{},239.99960374832153,24,213,52778.71068429947,"{'num_steps_sampled': 255600, 'num_steps_trained': 255600, 'default': {'policy_loss': -0.13345371186733246, 'vf_explained_var': 0.9282054901123047, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 12.236493110656738, 'entropy': 13.436868667602539, 'kl': 0.01283181644976139, 'total_loss': 12.116031646728516}, 'sample_time_ms': 245877.083, 'grad_time_ms': 695.115, 'load_time_ms': 1.61, 'update_time_ms': 2.515}",255600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},255600,cda-server-3,-48.388893830147204
+7ffa6ff4607a442eb508661143530d5b,53024.146672964096,50.0,False,-88.33545886911972,214,5136,1756447442,-53.82445902246092,1566858,1200,2025-08-29_08-04-02,{},245.43598866462708,24,214,53024.146672964096,"{'num_steps_sampled': 256800, 'num_steps_trained': 256800, 'default': {'policy_loss': -0.12373081594705582, 'vf_explained_var': 0.9054085612297058, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 16.972644805908203, 'entropy': 13.323928833007812, 'kl': 0.013606571592390537, 'total_loss': 16.86269187927246}, 'sample_time_ms': 247686.897, 'grad_time_ms': 696.479, 'load_time_ms': 1.586, 'update_time_ms': 2.517}",256800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},256800,cda-server-3,-48.388893830147204
+7ffa6ff4607a442eb508661143530d5b,53255.89246845245,50.0,False,-81.13652323493616,215,5160,1756447674,-53.73372533272343,1566858,1200,2025-08-29_08-07-54,{},231.74579548835754,24,215,53255.89246845245,"{'num_steps_sampled': 258000, 'num_steps_trained': 258000, 'default': {'policy_loss': -0.11627980321645737, 'vf_explained_var': 0.9099230170249939, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 14.198071479797363, 'entropy': 13.262800216674805, 'kl': 0.01339254342019558, 'total_loss': 14.095352172851562}, 'sample_time_ms': 246925.159, 'grad_time_ms': 695.831, 'load_time_ms': 1.562, 'update_time_ms': 2.554}",258000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},258000,cda-server-3,-48.388893830147204
+7ffa6ff4607a442eb508661143530d5b,53515.743619441986,50.0,False,-92.46390703641067,216,5184,1756447934,-53.932576924530615,1566858,1200,2025-08-29_08-12-14,{},259.85115098953247,24,216,53515.743619441986,"{'num_steps_sampled': 259200, 'num_steps_trained': 259200, 'default': {'policy_loss': -0.12003253400325775, 'vf_explained_var': 0.9072751998901367, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 15.475444793701172, 'entropy': 13.39101505279541, 'kl': 0.013980243355035782, 'total_loss': 15.36956787109375}, 'sample_time_ms': 244104.479, 'grad_time_ms': 697.843, 'load_time_ms': 1.577, 'update_time_ms': 2.535}",259200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},259200,cda-server-3,-47.03767859697603
+7ffa6ff4607a442eb508661143530d5b,53759.407838344574,50.0,False,-92.46390703641067,217,5208,1756448177,-53.47457909992057,1566858,1200,2025-08-29_08-16-17,{},243.6642189025879,24,217,53759.407838344574,"{'num_steps_sampled': 260400, 'num_steps_trained': 260400, 'default': {'policy_loss': -0.13741131126880646, 'vf_explained_var': 0.9654526114463806, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 4.5877685546875, 'entropy': 13.240228652954102, 'kl': 0.01664682850241661, 'total_loss': 4.467211723327637}, 'sample_time_ms': 242669.242, 'grad_time_ms': 697.574, 'load_time_ms': 1.573, 'update_time_ms': 2.499}",260400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},260400,cda-server-3,-47.03767859697603
+7ffa6ff4607a442eb508661143530d5b,53989.52684402466,50.0,False,-92.46390703641067,218,5232,1756448408,-53.09185593325324,1566858,1200,2025-08-29_08-20-08,{},230.11900568008423,24,218,53989.52684402466,"{'num_steps_sampled': 261600, 'num_steps_trained': 261600, 'default': {'policy_loss': -0.1212388426065445, 'vf_explained_var': 0.9537698030471802, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 6.838181495666504, 'entropy': 13.423412322998047, 'kl': 0.017338156700134277, 'total_loss': 6.7344970703125}, 'sample_time_ms': 236850.174, 'grad_time_ms': 698.088, 'load_time_ms': 1.564, 'update_time_ms': 2.498}",261600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},261600,cda-server-3,-47.03767859697603
+7ffa6ff4607a442eb508661143530d5b,54236.05536913872,50.0,False,-92.46390703641067,219,5256,1756448654,-53.0488133532636,1566858,1200,2025-08-29_08-24-14,{},246.52852511405945,24,219,54236.05536913872,"{'num_steps_sampled': 262800, 'num_steps_trained': 262800, 'default': {'policy_loss': -0.12424381822347641, 'vf_explained_var': 0.9722467064857483, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 3.777590751647949, 'entropy': 13.256404876708984, 'kl': 0.016340035945177078, 'total_loss': 3.669891357421875}, 'sample_time_ms': 237256.053, 'grad_time_ms': 699.231, 'load_time_ms': 1.612, 'update_time_ms': 2.504}",262800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},262800,cda-server-3,-47.03767859697603
+7ffa6ff4607a442eb508661143530d5b,54476.720437288284,50.0,False,-59.73112114747605,220,5280,1756448895,-52.58953990415711,1566858,1200,2025-08-29_08-28-15,{},240.66506814956665,24,220,54476.720437288284,"{'num_steps_sampled': 264000, 'num_steps_trained': 264000, 'default': {'policy_loss': -0.1257346272468567, 'vf_explained_var': 0.9700483679771423, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 3.909719705581665, 'entropy': 13.458242416381836, 'kl': 0.01716863550245762, 'total_loss': 3.801368236541748}, 'sample_time_ms': 239528.368, 'grad_time_ms': 699.809, 'load_time_ms': 1.663, 'update_time_ms': 2.522}",264000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},264000,cda-server-3,-49.35778091512252
+7ffa6ff4607a442eb508661143530d5b,54716.46133208275,50.0,False,-59.73112114747605,221,5304,1756449135,-52.64133109806006,1566858,1200,2025-08-29_08-32-15,{},239.7408947944641,24,221,54716.46133208275,"{'num_steps_sampled': 265200, 'num_steps_trained': 265200, 'default': {'policy_loss': -0.12260796129703522, 'vf_explained_var': 0.9685428142547607, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 4.314360618591309, 'entropy': 13.286518096923828, 'kl': 0.015742920339107513, 'total_loss': 4.207692623138428}, 'sample_time_ms': 239897.071, 'grad_time_ms': 697.716, 'load_time_ms': 1.677, 'update_time_ms': 2.503}",265200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},265200,cda-server-3,-49.35778091512252
+7ffa6ff4607a442eb508661143530d5b,54962.24299144745,50.0,False,-59.73112114747605,222,5328,1756449380,-52.525968282336315,1566858,1200,2025-08-29_08-36-20,{},245.78165936470032,24,222,54962.24299144745,"{'num_steps_sampled': 266400, 'num_steps_trained': 266400, 'default': {'policy_loss': -0.12170767784118652, 'vf_explained_var': 0.9610524773597717, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 5.0196027755737305, 'entropy': 13.212718963623047, 'kl': 0.01548507995903492, 'total_loss': 4.913573741912842}, 'sample_time_ms': 241645.905, 'grad_time_ms': 697.901, 'load_time_ms': 1.665, 'update_time_ms': 2.55}",266400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},266400,cda-server-3,-49.00649469013475
+7ffa6ff4607a442eb508661143530d5b,55248.51720046997,50.0,False,-83.55056700243956,223,5352,1756449667,-52.698129910872005,1566858,1200,2025-08-29_08-41-07,{},286.274209022522,24,223,55248.51720046997,"{'num_steps_sampled': 267600, 'num_steps_trained': 267600, 'default': {'policy_loss': -0.12211109697818756, 'vf_explained_var': 0.9223343729972839, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 13.342047691345215, 'entropy': 13.361546516418457, 'kl': 0.012498829513788223, 'total_loss': 13.23259162902832}, 'sample_time_ms': 246272.877, 'grad_time_ms': 698.403, 'load_time_ms': 1.68, 'update_time_ms': 2.542}",267600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},267600,cda-server-3,-49.00649469013475
+7ffa6ff4607a442eb508661143530d5b,55457.6604681015,50.0,False,-83.55056700243956,224,5376,1756449876,-52.71015166295291,1566858,1200,2025-08-29_08-44-36,{},209.14326763153076,24,224,55457.6604681015,"{'num_steps_sampled': 268800, 'num_steps_trained': 268800, 'default': {'policy_loss': -0.13790854811668396, 'vf_explained_var': 0.9650555849075317, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 4.732025623321533, 'entropy': 13.232372283935547, 'kl': 0.01659400947391987, 'total_loss': 4.610918045043945}, 'sample_time_ms': 242643.923, 'grad_time_ms': 698.085, 'load_time_ms': 1.7, 'update_time_ms': 2.541}",268800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},268800,cda-server-3,-49.00649469013475
+7ffa6ff4607a442eb508661143530d5b,55714.48773908615,50.0,False,-83.55056700243956,225,5400,1756450133,-52.85868581510861,1566858,1200,2025-08-29_08-48-53,{},256.82727098464966,24,225,55714.48773908615,"{'num_steps_sampled': 270000, 'num_steps_trained': 270000, 'default': {'policy_loss': -0.13200251758098602, 'vf_explained_var': 0.9513610005378723, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 6.916146278381348, 'entropy': 13.240900993347168, 'kl': 0.016578860580921173, 'total_loss': 6.800930023193359}, 'sample_time_ms': 245150.936, 'grad_time_ms': 699.226, 'load_time_ms': 1.708, 'update_time_ms': 2.534}",270000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},270000,cda-server-3,-49.00649469013475
+7ffa6ff4607a442eb508661143530d5b,55974.45828509331,50.0,False,-83.55056700243956,226,5424,1756450393,-52.82738876249813,1566858,1200,2025-08-29_08-53-13,{},259.9705460071564,24,226,55974.45828509331,"{'num_steps_sampled': 271200, 'num_steps_trained': 271200, 'default': {'policy_loss': -0.12473750114440918, 'vf_explained_var': 0.9689039587974548, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 4.646268844604492, 'entropy': 13.287884712219238, 'kl': 0.01698196679353714, 'total_loss': 4.538724899291992}, 'sample_time_ms': 245165.588, 'grad_time_ms': 696.612, 'load_time_ms': 1.62, 'update_time_ms': 2.564}",271200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},271200,cda-server-3,-49.016792454608456
+7ffa6ff4607a442eb508661143530d5b,56223.66062140465,50.0,False,-79.4545443855248,227,5448,1756450642,-52.9500375272901,1566858,1200,2025-08-29_08-57-22,{},249.20233631134033,24,227,56223.66062140465,"{'num_steps_sampled': 272400, 'num_steps_trained': 272400, 'default': {'policy_loss': -0.10819558054208755, 'vf_explained_var': 0.8816754817962646, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 23.217864990234375, 'entropy': 13.26015853881836, 'kl': 0.012501864694058895, 'total_loss': 23.12232780456543}, 'sample_time_ms': 245719.133, 'grad_time_ms': 696.818, 'load_time_ms': 1.61, 'update_time_ms': 2.574}",272400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},272400,cda-server-3,-49.268852078605434
+7ffa6ff4607a442eb508661143530d5b,56507.24248743057,50.0,False,-79.4545443855248,228,5472,1756450925,-52.91810579853349,1566858,1200,2025-08-29_09-02-05,{},283.5818660259247,24,228,56507.24248743057,"{'num_steps_sampled': 273600, 'num_steps_trained': 273600, 'default': {'policy_loss': -0.10992512106895447, 'vf_explained_var': 0.944269597530365, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 7.72075080871582, 'entropy': 13.22465705871582, 'kl': 0.016507161781191826, 'total_loss': 7.627538204193115}, 'sample_time_ms': 251065.61, 'grad_time_ms': 696.614, 'load_time_ms': 1.599, 'update_time_ms': 2.572}",273600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},273600,cda-server-3,-49.98911850932992
+7ffa6ff4607a442eb508661143530d5b,56702.564005851746,50.0,False,-79.4545443855248,229,5496,1756451121,-52.85347165246375,1566858,1200,2025-08-29_09-05-21,{},195.3215184211731,24,229,56702.564005851746,"{'num_steps_sampled': 274800, 'num_steps_trained': 274800, 'default': {'policy_loss': -0.1320653110742569, 'vf_explained_var': 0.9556113481521606, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 6.415472984313965, 'entropy': 13.11414909362793, 'kl': 0.016584740951657295, 'total_loss': 6.300199508666992}, 'sample_time_ms': 245944.854, 'grad_time_ms': 696.672, 'load_time_ms': 1.598, 'update_time_ms': 2.527}",274800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},274800,cda-server-3,-49.98911850932992
+7ffa6ff4607a442eb508661143530d5b,56951.91757917404,50.0,False,-79.4545443855248,230,5520,1756451370,-52.92350903145639,1566858,1200,2025-08-29_09-09-30,{},249.35357332229614,24,230,56951.91757917404,"{'num_steps_sampled': 276000, 'num_steps_trained': 276000, 'default': {'policy_loss': -0.11662941426038742, 'vf_explained_var': 0.9661198854446411, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 4.510845184326172, 'entropy': 13.191387176513672, 'kl': 0.01542899664491415, 'total_loss': 4.40983772277832}, 'sample_time_ms': 246813.773, 'grad_time_ms': 696.606, 'load_time_ms': 1.602, 'update_time_ms': 2.518}",276000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},276000,cda-server-3,-50.02913413293667
+7ffa6ff4607a442eb508661143530d5b,57180.85185909271,50.0,False,-66.7089208892692,231,5544,1756451599,-52.592358692493825,1566858,1200,2025-08-29_09-13-19,{},228.93427991867065,24,231,57180.85185909271,"{'num_steps_sampled': 277200, 'num_steps_trained': 277200, 'default': {'policy_loss': -0.13687659800052643, 'vf_explained_var': 0.9492168426513672, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 6.889738082885742, 'entropy': 12.978316307067871, 'kl': 0.01719477027654648, 'total_loss': 6.770271301269531}, 'sample_time_ms': 245731.909, 'grad_time_ms': 697.838, 'load_time_ms': 1.582, 'update_time_ms': 2.513}",277200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},277200,cda-server-3,-49.17351010815454
+7ffa6ff4607a442eb508661143530d5b,57436.04451966286,50.0,False,-66.7089208892692,232,5568,1756451854,-52.62568365697358,1566858,1200,2025-08-29_09-17-34,{},255.19266057014465,24,232,57436.04451966286,"{'num_steps_sampled': 278400, 'num_steps_trained': 278400, 'default': {'policy_loss': -0.14635403454303741, 'vf_explained_var': 0.9654095768928528, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 4.514674186706543, 'entropy': 13.11203384399414, 'kl': 0.016351299360394478, 'total_loss': 4.384875774383545}, 'sample_time_ms': 246673.487, 'grad_time_ms': 697.352, 'load_time_ms': 1.581, 'update_time_ms': 2.472}",278400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},278400,cda-server-3,-49.17351010815454
+7ffa6ff4607a442eb508661143530d5b,57669.30855512619,50.0,False,-66.7089208892692,233,5592,1756452087,-52.547124175309,1566858,1200,2025-08-29_09-21-27,{},233.26403546333313,24,233,57669.30855512619,"{'num_steps_sampled': 279600, 'num_steps_trained': 279600, 'default': {'policy_loss': -0.14203177392482758, 'vf_explained_var': 0.965411901473999, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 4.680802822113037, 'entropy': 13.123396873474121, 'kl': 0.015472842380404472, 'total_loss': 4.554436683654785}, 'sample_time_ms': 241372.063, 'grad_time_ms': 697.741, 'load_time_ms': 1.555, 'update_time_ms': 2.509}",279600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},279600,cda-server-3,-49.17351010815454
+7ffa6ff4607a442eb508661143530d5b,57916.46813702583,50.0,False,-66.7089208892692,234,5616,1756452335,-52.47950947759737,1566858,1200,2025-08-29_09-25-35,{},247.15958189964294,24,234,57916.46813702583,"{'num_steps_sampled': 280800, 'num_steps_trained': 280800, 'default': {'policy_loss': -0.1272757351398468, 'vf_explained_var': 0.9736604690551758, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 3.4182353019714355, 'entropy': 13.120083808898926, 'kl': 0.01562454178929329, 'total_loss': 3.306779384613037}, 'sample_time_ms': 245174.253, 'grad_time_ms': 697.181, 'load_time_ms': 1.549, 'update_time_ms': 2.539}",280800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},280800,cda-server-3,-49.17351010815454
+7ffa6ff4607a442eb508661143530d5b,58124.95299601555,50.0,False,-66.7089208892692,235,5640,1756452543,-52.54500402832971,1566858,1200,2025-08-29_09-29-03,{},208.48485898971558,24,235,58124.95299601555,"{'num_steps_sampled': 282000, 'num_steps_trained': 282000, 'default': {'policy_loss': -0.11984744668006897, 'vf_explained_var': 0.9667076468467712, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 4.498193740844727, 'entropy': 12.998368263244629, 'kl': 0.014812729321420193, 'total_loss': 4.393343925476074}, 'sample_time_ms': 240340.756, 'grad_time_ms': 696.409, 'load_time_ms': 1.566, 'update_time_ms': 2.515}",282000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},282000,cda-server-3,-49.879847194777106
+7ffa6ff4607a442eb508661143530d5b,58354.85333657265,50.0,False,-63.120537966067694,236,5664,1756452773,-52.38867305401343,1566858,1200,2025-08-29_09-32-53,{},229.9003405570984,24,236,58354.85333657265,"{'num_steps_sampled': 283200, 'num_steps_trained': 283200, 'default': {'policy_loss': -0.12795832753181458, 'vf_explained_var': 0.9684709906578064, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 4.145485877990723, 'entropy': 12.91740894317627, 'kl': 0.01682090386748314, 'total_loss': 4.0345587730407715}, 'sample_time_ms': 237333.004, 'grad_time_ms': 697.025, 'load_time_ms': 1.646, 'update_time_ms': 2.515}",283200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},283200,cda-server-3,-49.41547090352766
+7ffa6ff4607a442eb508661143530d5b,58596.74061131477,50.0,False,-63.120537966067694,237,5688,1756453015,-52.378672504431236,1566858,1200,2025-08-29_09-36-55,{},241.88727474212646,24,237,58596.74061131477,"{'num_steps_sampled': 284400, 'num_steps_trained': 284400, 'default': {'policy_loss': -0.12086444348096848, 'vf_explained_var': 0.9703031182289124, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 4.048018932342529, 'entropy': 13.210933685302734, 'kl': 0.01684574969112873, 'total_loss': 3.944211006164551}, 'sample_time_ms': 236600.769, 'grad_time_ms': 697.81, 'load_time_ms': 1.621, 'update_time_ms': 2.511}",284400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},284400,cda-server-3,-49.41547090352766
+7ffa6ff4607a442eb508661143530d5b,58796.771169900894,50.0,False,-62.37009129837001,238,5712,1756453215,-52.37850576015482,1566858,1200,2025-08-29_09-40-15,{},200.0305585861206,24,238,58796.771169900894,"{'num_steps_sampled': 285600, 'num_steps_trained': 285600, 'default': {'policy_loss': -0.1238275095820427, 'vf_explained_var': 0.9677734375, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 4.313910484313965, 'entropy': 12.92751693725586, 'kl': 0.015617319382727146, 'total_loss': 4.205895900726318}, 'sample_time_ms': 228245.691, 'grad_time_ms': 697.581, 'load_time_ms': 1.699, 'update_time_ms': 2.532}",285600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},285600,cda-server-3,-49.41547090352766
+7ffa6ff4607a442eb508661143530d5b,59026.676966905594,50.0,False,-95.4942763001984,239,5736,1756453445,-52.737142631935086,1566858,1200,2025-08-29_09-44-05,{},229.9057970046997,24,239,59026.676966905594,"{'num_steps_sampled': 286800, 'num_steps_trained': 286800, 'default': {'policy_loss': -0.13358724117279053, 'vf_explained_var': 0.9385756254196167, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 12.510951042175293, 'entropy': 12.898584365844727, 'kl': 0.01375828217715025, 'total_loss': 12.391292572021484}, 'sample_time_ms': 231704.422, 'grad_time_ms': 697.294, 'load_time_ms': 1.688, 'update_time_ms': 2.559}",286800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},286800,cda-server-3,-49.21767791815008
+7ffa6ff4607a442eb508661143530d5b,59265.3185608387,50.0,False,-95.4942763001984,240,5760,1756453684,-52.989287994986306,1566858,1200,2025-08-29_09-48-04,{},238.64159393310547,24,240,59265.3185608387,"{'num_steps_sampled': 288000, 'num_steps_trained': 288000, 'default': {'policy_loss': -0.12578149139881134, 'vf_explained_var': 0.9672372341156006, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 4.648038864135742, 'entropy': 12.954557418823242, 'kl': 0.01652970165014267, 'total_loss': 4.5389933586120605}, 'sample_time_ms': 230634.254, 'grad_time_ms': 696.269, 'load_time_ms': 1.702, 'update_time_ms': 2.559}",288000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},288000,cda-server-3,-49.21767791815008
+7ffa6ff4607a442eb508661143530d5b,59519.89746642113,50.0,False,-106.44784318134155,241,5784,1756453938,-53.59184756133134,1566858,1200,2025-08-29_09-52-18,{},254.57890558242798,24,241,59519.89746642113,"{'num_steps_sampled': 289200, 'num_steps_trained': 289200, 'default': {'policy_loss': -0.11200863867998123, 'vf_explained_var': 0.9398728609085083, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 11.982730865478516, 'entropy': 12.876962661743164, 'kl': 0.012609120458364487, 'total_loss': 11.883487701416016}, 'sample_time_ms': 233197.913, 'grad_time_ms': 697.002, 'load_time_ms': 1.706, 'update_time_ms': 2.571}",289200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},289200,cda-server-3,-49.21767791815008
+7ffa6ff4607a442eb508661143530d5b,59724.84717440605,50.0,False,-106.44784318134155,242,5808,1756454143,-53.62086601566846,1566858,1200,2025-08-29_09-55-43,{},204.94970798492432,24,242,59724.84717440605,"{'num_steps_sampled': 290400, 'num_steps_trained': 290400, 'default': {'policy_loss': -0.12599098682403564, 'vf_explained_var': 0.9540507793426514, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 6.292912483215332, 'entropy': 12.756505966186523, 'kl': 0.015495683066546917, 'total_loss': 6.182610511779785}, 'sample_time_ms': 228172.987, 'grad_time_ms': 697.602, 'load_time_ms': 1.711, 'update_time_ms': 2.593}",290400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},290400,cda-server-3,-49.21767791815008
+7ffa6ff4607a442eb508661143530d5b,59956.89122271538,50.0,False,-106.44784318134155,243,5832,1756454375,-53.901326526581414,1566858,1200,2025-08-29_09-59-35,{},232.04404830932617,24,243,59956.89122271538,"{'num_steps_sampled': 291600, 'num_steps_trained': 291600, 'default': {'policy_loss': -0.12324307858943939, 'vf_explained_var': 0.9141952991485596, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 14.333452224731445, 'entropy': 13.002093315124512, 'kl': 0.014958103187382221, 'total_loss': 14.225353240966797}, 'sample_time_ms': 228052.492, 'grad_time_ms': 696.181, 'load_time_ms': 1.651, 'update_time_ms': 2.577}",291600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},291600,cda-server-3,-49.21767791815008
+7ffa6ff4607a442eb508661143530d5b,60170.40907239914,50.0,False,-106.44784318134155,244,5856,1756454589,-53.29756909889751,1566858,1200,2025-08-29_10-03-09,{},213.5178496837616,24,244,60170.40907239914,"{'num_steps_sampled': 292800, 'num_steps_trained': 292800, 'default': {'policy_loss': -0.13375505805015564, 'vf_explained_var': 0.97227942943573, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 3.7006442546844482, 'entropy': 12.993228912353516, 'kl': 0.016853027045726776, 'total_loss': 3.5839526653289795}, 'sample_time_ms': 224688.31, 'grad_time_ms': 696.174, 'load_time_ms': 1.645, 'update_time_ms': 2.576}",292800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},292800,cda-server-3,-49.240560247852144
+7ffa6ff4607a442eb508661143530d5b,60411.574466466904,50.0,False,-76.23910984773836,245,5880,1756454830,-52.81890130686282,1566858,1200,2025-08-29_10-07-10,{},241.16539406776428,24,245,60411.574466466904,"{'num_steps_sampled': 294000, 'num_steps_trained': 294000, 'default': {'policy_loss': -0.1345943808555603, 'vf_explained_var': 0.9743247628211975, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 3.4963855743408203, 'entropy': 12.831571578979492, 'kl': 0.016198769211769104, 'total_loss': 3.378192186355591}, 'sample_time_ms': 227954.699, 'grad_time_ms': 697.876, 'load_time_ms': 1.628, 'update_time_ms': 2.576}",294000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},294000,cda-server-3,-49.240560247852144
+7ffa6ff4607a442eb508661143530d5b,60602.57510614395,50.0,False,-76.23910984773836,246,5904,1756455021,-52.90135958003802,1566858,1200,2025-08-29_10-10-21,{},191.00063967704773,24,246,60602.57510614395,"{'num_steps_sampled': 295200, 'num_steps_trained': 295200, 'default': {'policy_loss': -0.1390654593706131, 'vf_explained_var': 0.9514430165290833, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 6.642275810241699, 'entropy': 12.809523582458496, 'kl': 0.018181614577770233, 'total_loss': 6.52161979675293}, 'sample_time_ms': 224064.105, 'grad_time_ms': 698.505, 'load_time_ms': 1.624, 'update_time_ms': 2.565}",295200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},295200,cda-server-3,-49.240560247852144
+7ffa6ff4607a442eb508661143530d5b,60830.046969652176,50.0,False,-65.48786138168421,247,5928,1756455248,-52.881967742395965,1566858,1200,2025-08-29_10-14-08,{},227.4718635082245,24,247,60830.046969652176,"{'num_steps_sampled': 296400, 'num_steps_trained': 296400, 'default': {'policy_loss': -0.13701820373535156, 'vf_explained_var': 0.9588128328323364, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 5.714297771453857, 'entropy': 12.85362720489502, 'kl': 0.017220674082636833, 'total_loss': 5.594715595245361}, 'sample_time_ms': 222621.545, 'grad_time_ms': 699.502, 'load_time_ms': 1.664, 'update_time_ms': 2.558}",296400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},296400,cda-server-3,-51.05344091696414
+7ffa6ff4607a442eb508661143530d5b,61065.82716369629,50.0,False,-65.48786138168421,248,5952,1756455484,-52.927681770163744,1566858,1200,2025-08-29_10-18-04,{},235.78019404411316,24,248,61065.82716369629,"{'num_steps_sampled': 297600, 'num_steps_trained': 297600, 'default': {'policy_loss': -0.1399531066417694, 'vf_explained_var': 0.9703459143638611, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 4.2092742919921875, 'entropy': 12.845855712890625, 'kl': 0.01608450338244438, 'total_loss': 4.085606575012207}, 'sample_time_ms': 226196.996, 'grad_time_ms': 699.192, 'load_time_ms': 1.588, 'update_time_ms': 2.532}",297600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},297600,cda-server-3,-51.05344091696414
+7ffa6ff4607a442eb508661143530d5b,61246.74543738365,50.0,False,-65.48786138168421,249,5976,1756455665,-52.78914995655172,1566858,1200,2025-08-29_10-21-05,{},180.91827368736267,24,249,61246.74543738365,"{'num_steps_sampled': 298800, 'num_steps_trained': 298800, 'default': {'policy_loss': -0.11524263024330139, 'vf_explained_var': 0.9645593166351318, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 4.863693714141846, 'entropy': 12.834324836730957, 'kl': 0.01413909625262022, 'total_loss': 4.762767314910889}, 'sample_time_ms': 221299.41, 'grad_time_ms': 698.133, 'load_time_ms': 1.508, 'update_time_ms': 2.56}",298800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},298800,cda-server-3,-50.57447261648545
+7ffa6ff4607a442eb508661143530d5b,61500.9609041214,50.0,False,-70.8772337757874,250,6000,1756455919,-52.827164561053394,1566858,1200,2025-08-29_10-25-19,{},254.2154667377472,24,250,61500.9609041214,"{'num_steps_sampled': 300000, 'num_steps_trained': 300000, 'default': {'policy_loss': -0.12152360379695892, 'vf_explained_var': 0.9502347111701965, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 8.935587882995605, 'entropy': 12.682344436645508, 'kl': 0.01290571317076683, 'total_loss': 8.827131271362305}, 'sample_time_ms': 222856.172, 'grad_time_ms': 698.697, 'load_time_ms': 1.507, 'update_time_ms': 2.556}",300000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},300000,cda-server-3,-49.381404257923435
+7ffa6ff4607a442eb508661143530d5b,61727.96933889389,50.0,False,-80.43938479448286,251,6024,1756456146,-52.932297495206534,1566858,1200,2025-08-29_10-29-06,{},227.00843477249146,24,251,61727.96933889389,"{'num_steps_sampled': 301200, 'num_steps_trained': 301200, 'default': {'policy_loss': -0.124129518866539, 'vf_explained_var': 0.9445521831512451, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 9.515353202819824, 'entropy': 12.694993019104004, 'kl': 0.013231638818979263, 'total_loss': 9.404621124267578}, 'sample_time_ms': 220099.801, 'grad_time_ms': 698.036, 'load_time_ms': 1.503, 'update_time_ms': 2.569}",301200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},301200,cda-server-3,-49.381404257923435
+7ffa6ff4607a442eb508661143530d5b,61935.81016087532,50.0,False,-83.02410042439696,252,6048,1756456354,-53.23362229005515,1566858,1200,2025-08-29_10-32-34,{},207.84082198143005,24,252,61935.81016087532,"{'num_steps_sampled': 302400, 'num_steps_trained': 302400, 'default': {'policy_loss': -0.1158803403377533, 'vf_explained_var': 0.8811068534851074, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 23.63001823425293, 'entropy': 12.623221397399902, 'kl': 0.012091527692973614, 'total_loss': 23.52638053894043}, 'sample_time_ms': 220389.057, 'grad_time_ms': 698.046, 'load_time_ms': 1.442, 'update_time_ms': 2.575}",302400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},302400,cda-server-3,-48.46283934118226
+7ffa6ff4607a442eb508661143530d5b,62156.379033088684,50.0,False,-83.02410042439696,253,6072,1756456575,-53.11197609884594,1566858,1200,2025-08-29_10-36-15,{},220.56887221336365,24,253,62156.379033088684,"{'num_steps_sampled': 303600, 'num_steps_trained': 303600, 'default': {'policy_loss': -0.12129177153110504, 'vf_explained_var': 0.9589307308197021, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 5.636325836181641, 'entropy': 12.391603469848633, 'kl': 0.016065770760178566, 'total_loss': 5.531301021575928}, 'sample_time_ms': 219241.656, 'grad_time_ms': 697.976, 'load_time_ms': 1.409, 'update_time_ms': 2.556}",303600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},303600,cda-server-3,-48.46283934118226
+7ffa6ff4607a442eb508661143530d5b,62395.67424201965,50.0,False,-83.02410042439696,254,6096,1756456814,-52.897401643507685,1566858,1200,2025-08-29_10-40-14,{},239.29520893096924,24,254,62395.67424201965,"{'num_steps_sampled': 304800, 'num_steps_trained': 304800, 'default': {'policy_loss': -0.13790710270404816, 'vf_explained_var': 0.9639573693275452, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 4.655649185180664, 'entropy': 12.867449760437012, 'kl': 0.016777753829956055, 'total_loss': 4.53472900390625}, 'sample_time_ms': 221819.661, 'grad_time_ms': 697.707, 'load_time_ms': 1.409, 'update_time_ms': 2.544}",304800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},304800,cda-server-3,-46.975067536221076
+7ffa6ff4607a442eb508661143530d5b,62574.46407747269,50.0,False,-83.02410042439696,255,6120,1756456993,-53.09295997154534,1566858,1200,2025-08-29_10-43-13,{},178.78983545303345,24,255,62574.46407747269,"{'num_steps_sampled': 306000, 'num_steps_trained': 306000, 'default': {'policy_loss': -0.1259656399488449, 'vf_explained_var': 0.9383307695388794, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 10.441603660583496, 'entropy': 12.83752727508545, 'kl': 0.012530826032161713, 'total_loss': 10.328326225280762}, 'sample_time_ms': 215582.737, 'grad_time_ms': 697.005, 'load_time_ms': 1.445, 'update_time_ms': 2.567}",306000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},306000,cda-server-3,-46.975067536221076
+7ffa6ff4607a442eb508661143530d5b,62805.72783088684,50.0,False,-83.02410042439696,256,6144,1756457224,-52.69068645877551,1566858,1200,2025-08-29_10-47-04,{},231.26375341415405,24,256,62805.72783088684,"{'num_steps_sampled': 307200, 'num_steps_trained': 307200, 'default': {'policy_loss': -0.11372081190347672, 'vf_explained_var': 0.9600616097450256, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 5.309256553649902, 'entropy': 12.717082977294922, 'kl': 0.015648726373910904, 'total_loss': 5.211379528045654}, 'sample_time_ms': 219609.757, 'grad_time_ms': 696.257, 'load_time_ms': 1.456, 'update_time_ms': 2.573}",307200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},307200,cda-server-3,-46.975067536221076
+7ffa6ff4607a442eb508661143530d5b,63022.77389717102,50.0,False,-76.95679255815752,257,6168,1756457441,-52.35233045584228,1566858,1200,2025-08-29_10-50-41,{},217.0460662841797,24,257,63022.77389717102,"{'num_steps_sampled': 308400, 'num_steps_trained': 308400, 'default': {'policy_loss': -0.14049550890922546, 'vf_explained_var': 0.9665980935096741, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 4.424448490142822, 'entropy': 12.783870697021484, 'kl': 0.015212688595056534, 'total_loss': 4.299355983734131}, 'sample_time_ms': 218569.116, 'grad_time_ms': 694.29, 'load_time_ms': 1.439, 'update_time_ms': 2.611}",308400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},308400,cda-server-3,-46.975067536221076
+7ffa6ff4607a442eb508661143530d5b,63204.996910095215,50.0,False,-76.95679255815752,258,6192,1756457623,-52.605614783542904,1566858,1200,2025-08-29_10-53-43,{},182.22301292419434,24,258,63204.996910095215,"{'num_steps_sampled': 309600, 'num_steps_trained': 309600, 'default': {'policy_loss': -0.11564840376377106, 'vf_explained_var': 0.9276121854782104, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 11.025399208068848, 'entropy': 12.666180610656738, 'kl': 0.01288242544978857, 'total_loss': 10.922794342041016}, 'sample_time_ms': 213212.693, 'grad_time_ms': 695.016, 'load_time_ms': 1.456, 'update_time_ms': 2.624}",309600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},309600,cda-server-3,-48.96675049775499
+7ffa6ff4607a442eb508661143530d5b,63413.304302453995,50.0,False,-79.38376949820108,259,6216,1756457832,-52.79132498828461,1566858,1200,2025-08-29_10-57-12,{},208.3073923587799,24,259,63413.304302453995,"{'num_steps_sampled': 310800, 'num_steps_trained': 310800, 'default': {'policy_loss': -0.13469654321670532, 'vf_explained_var': 0.9125310182571411, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 14.25716781616211, 'entropy': 12.729401588439941, 'kl': 0.014358220621943474, 'total_loss': 14.13701057434082}, 'sample_time_ms': 215951.335, 'grad_time_ms': 695.194, 'load_time_ms': 1.538, 'update_time_ms': 2.573}",310800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},310800,cda-server-3,-48.96675049775499
+7ffa6ff4607a442eb508661143530d5b,63619.37710595131,50.0,False,-79.38376949820108,260,6240,1756458038,-53.09396680432882,1566858,1200,2025-08-29_11-00-38,{},206.07280349731445,24,260,63619.37710595131,"{'num_steps_sampled': 312000, 'num_steps_trained': 312000, 'default': {'policy_loss': -0.12505127489566803, 'vf_explained_var': 0.9432923197746277, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 9.74711799621582, 'entropy': 12.539690017700195, 'kl': 0.013607031665742397, 'total_loss': 9.635843276977539}, 'sample_time_ms': 211137.637, 'grad_time_ms': 694.838, 'load_time_ms': 1.436, 'update_time_ms': 2.583}",312000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},312000,cda-server-3,-48.96675049775499
+7ffa6ff4607a442eb508661143530d5b,63809.00711917877,50.0,False,-79.38376949820108,261,6264,1756458227,-53.28716145224107,1566858,1200,2025-08-29_11-03-47,{},189.63001322746277,24,261,63809.00711917877,"{'num_steps_sampled': 313200, 'num_steps_trained': 313200, 'default': {'policy_loss': -0.12509626150131226, 'vf_explained_var': 0.9446278810501099, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 8.04684829711914, 'entropy': 12.705997467041016, 'kl': 0.014072345569729805, 'total_loss': 7.936000347137451}, 'sample_time_ms': 207399.587, 'grad_time_ms': 695.059, 'load_time_ms': 1.451, 'update_time_ms': 2.572}",313200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},313200,cda-server-3,-49.004492976462004
+7ffa6ff4607a442eb508661143530d5b,64035.38051056862,50.0,False,-79.38376949820108,262,6288,1756458454,-53.4176266055403,1566858,1200,2025-08-29_11-07-34,{},226.3733913898468,24,262,64035.38051056862,"{'num_steps_sampled': 314400, 'num_steps_trained': 314400, 'default': {'policy_loss': -0.13777390122413635, 'vf_explained_var': 0.9654431939125061, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 4.891932964324951, 'entropy': 12.455157279968262, 'kl': 0.015701068565249443, 'total_loss': 4.770056247711182}, 'sample_time_ms': 209252.26, 'grad_time_ms': 695.58, 'load_time_ms': 1.509, 'update_time_ms': 2.548}",314400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},314400,cda-server-3,-49.004492976462004
+7ffa6ff4607a442eb508661143530d5b,64277.10109376907,50.0,False,-83.8225622835028,263,6312,1756458696,-53.768203859822826,1566858,1200,2025-08-29_11-11-36,{},241.7205832004547,24,263,64277.10109376907,"{'num_steps_sampled': 315600, 'num_steps_trained': 315600, 'default': {'policy_loss': -0.11208131909370422, 'vf_explained_var': 0.8933252692222595, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 17.854284286499023, 'entropy': 12.356292724609375, 'kl': 0.01158389076590538, 'total_loss': 17.753929138183594}, 'sample_time_ms': 211366.547, 'grad_time_ms': 696.315, 'load_time_ms': 1.607, 'update_time_ms': 2.583}",315600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},315600,cda-server-3,-49.782612914095786
+7ffa6ff4607a442eb508661143530d5b,64485.63278698921,50.0,False,-83.8225622835028,264,6336,1756458904,-53.170355109342026,1566858,1200,2025-08-29_11-15-04,{},208.53169322013855,24,264,64485.63278698921,"{'num_steps_sampled': 316800, 'num_steps_trained': 316800, 'default': {'policy_loss': -0.14228513836860657, 'vf_explained_var': 0.9692507982254028, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 4.200347900390625, 'entropy': 12.575531005859375, 'kl': 0.01658741384744644, 'total_loss': 4.074857711791992}, 'sample_time_ms': 208290.047, 'grad_time_ms': 696.472, 'load_time_ms': 1.609, 'update_time_ms': 2.566}",316800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},316800,cda-server-3,-50.16941653944491
+7ffa6ff4607a442eb508661143530d5b,64703.80116915703,50.0,False,-83.8225622835028,265,6360,1756459122,-53.07160473149186,1566858,1200,2025-08-29_11-18-42,{},218.16838216781616,24,265,64703.80116915703,"{'num_steps_sampled': 318000, 'num_steps_trained': 318000, 'default': {'policy_loss': -0.13435477018356323, 'vf_explained_var': 0.9649702906608582, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 4.764406681060791, 'entropy': 12.417465209960938, 'kl': 0.015358511358499527, 'total_loss': 4.645602703094482}, 'sample_time_ms': 212228.495, 'grad_time_ms': 695.929, 'load_time_ms': 1.58, 'update_time_ms': 2.549}",318000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},318000,cda-server-3,-46.99046521985731
+7ffa6ff4607a442eb508661143530d5b,64931.42123794556,50.0,False,-83.8225622835028,266,6384,1756459350,-52.872884910836525,1566858,1200,2025-08-29_11-22-30,{},227.62006878852844,24,266,64931.42123794556,"{'num_steps_sampled': 319200, 'num_steps_trained': 319200, 'default': {'policy_loss': -0.12467009574174881, 'vf_explained_var': 0.97074294090271, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 3.9884033203125, 'entropy': 12.425530433654785, 'kl': 0.01615087501704693, 'total_loss': 3.8800861835479736}, 'sample_time_ms': 211864.165, 'grad_time_ms': 696.02, 'load_time_ms': 1.503, 'update_time_ms': 2.543}",319200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},319200,cda-server-3,-46.99046521985731
+7ffa6ff4607a442eb508661143530d5b,65124.32090330124,50.0,False,-63.57036311703964,267,6408,1756459543,-52.37472990453051,1566858,1200,2025-08-29_11-25-43,{},192.89966535568237,24,267,65124.32090330124,"{'num_steps_sampled': 320400, 'num_steps_trained': 320400, 'default': {'policy_loss': -0.13256537914276123, 'vf_explained_var': 0.957770586013794, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 5.696261405944824, 'entropy': 12.094939231872559, 'kl': 0.016220351681113243, 'total_loss': 5.580119609832764}, 'sample_time_ms': 209449.156, 'grad_time_ms': 696.343, 'load_time_ms': 1.515, 'update_time_ms': 2.54}",320400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},320400,cda-server-3,-46.99046521985731
+7ffa6ff4607a442eb508661143530d5b,65352.82435941696,50.0,False,-63.57036311703964,268,6432,1756459771,-52.32770520567257,1566858,1200,2025-08-29_11-29-31,{},228.50345611572266,24,268,65352.82435941696,"{'num_steps_sampled': 321600, 'num_steps_trained': 321600, 'default': {'policy_loss': -0.13483382761478424, 'vf_explained_var': 0.9603874683380127, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 5.443893909454346, 'entropy': 12.340784072875977, 'kl': 0.015873024240136147, 'total_loss': 5.325130939483643}, 'sample_time_ms': 214077.564, 'grad_time_ms': 695.929, 'load_time_ms': 1.495, 'update_time_ms': 2.539}",321600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},321600,cda-server-3,-46.99046521985731
+7ffa6ff4607a442eb508661143530d5b,65557.31867551804,50.0,False,-63.57036311703964,269,6456,1756459976,-52.411166516284226,1566858,1200,2025-08-29_11-32-56,{},204.49431610107422,24,269,65557.31867551804,"{'num_steps_sampled': 322800, 'num_steps_trained': 322800, 'default': {'policy_loss': -0.14043231308460236, 'vf_explained_var': 0.9675581455230713, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 4.18639612197876, 'entropy': 12.402804374694824, 'kl': 0.018088258802890778, 'total_loss': 4.064278602600098}, 'sample_time_ms': 213695.485, 'grad_time_ms': 696.66, 'load_time_ms': 1.494, 'update_time_ms': 2.578}",322800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},322800,cda-server-3,-49.284212041297145
+7ffa6ff4607a442eb508661143530d5b,65765.64012217522,50.0,False,-84.43411533360964,270,6480,1756460184,-52.88237131219012,1566858,1200,2025-08-29_11-36-24,{},208.3214466571808,24,270,65765.64012217522,"{'num_steps_sampled': 324000, 'num_steps_trained': 324000, 'default': {'policy_loss': -0.1212284192442894, 'vf_explained_var': 0.9362192153930664, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 11.846854209899902, 'entropy': 12.34295654296875, 'kl': 0.01328012440353632, 'total_loss': 11.7390718460083}, 'sample_time_ms': 213920.267, 'grad_time_ms': 696.66, 'load_time_ms': 1.539, 'update_time_ms': 2.571}",324000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},324000,cda-server-3,-49.284212041297145
+7ffa6ff4607a442eb508661143530d5b,65988.13902163506,50.0,False,-84.43411533360964,271,6504,1756460407,-52.73400011488515,1566858,1200,2025-08-29_11-40-07,{},222.49889945983887,24,271,65988.13902163506,"{'num_steps_sampled': 325200, 'num_steps_trained': 325200, 'default': {'policy_loss': -0.12005000561475754, 'vf_explained_var': 0.9790047407150269, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 2.753878593444824, 'entropy': 12.271784782409668, 'kl': 0.017051290720701218, 'total_loss': 2.651093006134033}, 'sample_time_ms': 217207.977, 'grad_time_ms': 696.019, 'load_time_ms': 1.434, 'update_time_ms': 2.564}",325200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},325200,cda-server-3,-49.284212041297145
+7ffa6ff4607a442eb508661143530d5b,66196.45666050911,50.0,False,-84.43411533360964,272,6528,1756460615,-53.14926512060034,1566858,1200,2025-08-29_11-43-35,{},208.31763887405396,24,272,66196.45666050911,"{'num_steps_sampled': 326400, 'num_steps_trained': 326400, 'default': {'policy_loss': -0.12172228842973709, 'vf_explained_var': 0.941199004650116, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 8.98120403289795, 'entropy': 12.484696388244629, 'kl': 0.015201661735773087, 'total_loss': 8.874873161315918}, 'sample_time_ms': 215402.317, 'grad_time_ms': 696.168, 'load_time_ms': 1.386, 'update_time_ms': 2.557}",326400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},326400,cda-server-3,-50.37665546265208
+7ffa6ff4607a442eb508661143530d5b,66390.12393069267,50.0,False,-84.43411533360964,273,6552,1756460809,-53.24081358545004,1566858,1200,2025-08-29_11-46-49,{},193.66727018356323,24,273,66390.12393069267,"{'num_steps_sampled': 327600, 'num_steps_trained': 327600, 'default': {'policy_loss': -0.1288047730922699, 'vf_explained_var': 0.9695960283279419, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 4.136238098144531, 'entropy': 12.348140716552734, 'kl': 0.016651269048452377, 'total_loss': 4.024292945861816}, 'sample_time_ms': 210596.173, 'grad_time_ms': 696.955, 'load_time_ms': 1.392, 'update_time_ms': 2.56}",327600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},327600,cda-server-3,-50.37665546265208
+7ffa6ff4607a442eb508661143530d5b,66652.66490268707,50.0,False,-78.3542301798375,274,6576,1756461071,-52.8686931909312,1566858,1200,2025-08-29_11-51-11,{},262.5409719944,24,274,66652.66490268707,"{'num_steps_sampled': 328800, 'num_steps_trained': 328800, 'default': {'policy_loss': -0.140442356467247, 'vf_explained_var': 0.96639484167099, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 4.548933506011963, 'entropy': 12.452632904052734, 'kl': 0.018309663981199265, 'total_loss': 4.427030086517334}, 'sample_time_ms': 215996.626, 'grad_time_ms': 697.463, 'load_time_ms': 1.403, 'update_time_ms': 2.545}",328800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},328800,cda-server-3,-50.37665546265208
+7ffa6ff4607a442eb508661143530d5b,66875.22850847244,50.0,False,-78.3542301798375,275,6600,1756461294,-52.76412100550012,1566858,1200,2025-08-29_11-54-54,{},222.56360578536987,24,275,66875.22850847244,"{'num_steps_sampled': 330000, 'num_steps_trained': 330000, 'default': {'policy_loss': -0.13039201498031616, 'vf_explained_var': 0.9707435965538025, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 3.792579174041748, 'entropy': 12.545645713806152, 'kl': 0.017142174765467644, 'total_loss': 3.6795437335968018}, 'sample_time_ms': 216436.407, 'grad_time_ms': 697.167, 'load_time_ms': 1.409, 'update_time_ms': 2.529}",330000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},330000,cda-server-3,-50.67591107693649
+7ffa6ff4607a442eb508661143530d5b,67104.32276844978,50.0,False,-71.39806989782852,276,6624,1756461523,-52.62207614615324,1566858,1200,2025-08-29_11-58-43,{},229.0942599773407,24,276,67104.32276844978,"{'num_steps_sampled': 331200, 'num_steps_trained': 331200, 'default': {'policy_loss': -0.1321687251329422, 'vf_explained_var': 0.9725034236907959, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 4.033144474029541, 'entropy': 12.464203834533691, 'kl': 0.01608506217598915, 'total_loss': 3.917262077331543}, 'sample_time_ms': 216582.319, 'grad_time_ms': 698.56, 'load_time_ms': 1.5, 'update_time_ms': 2.511}",331200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},331200,cda-server-3,-50.67591107693649
+7ffa6ff4607a442eb508661143530d5b,67301.52805280685,50.0,False,-86.9196750907215,277,6648,1756461720,-53.180265980102625,1566858,1200,2025-08-29_12-02-00,{},197.20528435707092,24,277,67301.52805280685,"{'num_steps_sampled': 332400, 'num_steps_trained': 332400, 'default': {'policy_loss': -0.1267719715833664, 'vf_explained_var': 0.9339027404785156, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 13.914371490478516, 'entropy': 12.239913940429688, 'kl': 0.014391104690730572, 'total_loss': 13.80217170715332}, 'sample_time_ms': 217012.603, 'grad_time_ms': 698.911, 'load_time_ms': 1.494, 'update_time_ms': 2.502}",332400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},332400,cda-server-3,-49.05128504421615
+7ffa6ff4607a442eb508661143530d5b,67517.49462890625,50.0,False,-86.9196750907215,278,6672,1756461936,-53.126583249710436,1566858,1200,2025-08-29_12-05-36,{},215.96657609939575,24,278,67517.49462890625,"{'num_steps_sampled': 333600, 'num_steps_trained': 333600, 'default': {'policy_loss': -0.1337561011314392, 'vf_explained_var': 0.957696259021759, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 6.064571380615234, 'entropy': 12.279397964477539, 'kl': 0.01595686562359333, 'total_loss': 5.946971893310547}, 'sample_time_ms': 215758.99, 'grad_time_ms': 698.815, 'load_time_ms': 1.508, 'update_time_ms': 2.525}",333600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},333600,cda-server-3,-49.05128504421615
+7ffa6ff4607a442eb508661143530d5b,67772.74753212929,50.0,False,-86.9196750907215,279,6696,1756462191,-53.49961996527838,1566858,1200,2025-08-29_12-09-51,{},255.25290322303772,24,279,67772.74753212929,"{'num_steps_sampled': 334800, 'num_steps_trained': 334800, 'default': {'policy_loss': -0.1112731322646141, 'vf_explained_var': 0.9488842487335205, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 7.727088928222656, 'entropy': 12.261554718017578, 'kl': 0.013475686311721802, 'total_loss': 7.629459857940674}, 'sample_time_ms': 220834.503, 'grad_time_ms': 699.083, 'load_time_ms': 1.506, 'update_time_ms': 2.619}",334800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},334800,cda-server-3,-49.05128504421615
+7ffa6ff4607a442eb508661143530d5b,67992.6490688324,50.0,False,-86.9196750907215,280,6720,1756462411,-53.34118500330564,1566858,1200,2025-08-29_12-13-31,{},219.90153670310974,24,280,67992.6490688324,"{'num_steps_sampled': 336000, 'num_steps_trained': 336000, 'default': {'policy_loss': -0.14288152754306793, 'vf_explained_var': 0.9715897440910339, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 3.7593188285827637, 'entropy': 12.291492462158203, 'kl': 0.016825037077069283, 'total_loss': 3.6334729194641113}, 'sample_time_ms': 221991.215, 'grad_time_ms': 700.27, 'load_time_ms': 1.564, 'update_time_ms': 2.64}",336000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},336000,cda-server-3,-49.05128504421615
+7ffa6ff4607a442eb508661143530d5b,68177.69842720032,50.0,False,-72.29489230435841,281,6744,1756462596,-52.63661777018459,1566858,1200,2025-08-29_12-16-36,{},185.04935836791992,24,281,68177.69842720032,"{'num_steps_sampled': 337200, 'num_steps_trained': 337200, 'default': {'policy_loss': -0.12455210089683533, 'vf_explained_var': 0.972270667552948, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 3.6177749633789062, 'entropy': 12.42072582244873, 'kl': 0.015108318999409676, 'total_loss': 3.5085201263427734}, 'sample_time_ms': 218244.897, 'grad_time_ms': 701.468, 'load_time_ms': 1.665, 'update_time_ms': 2.651}",337200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},337200,cda-server-3,-51.17460335793359
+7ffa6ff4607a442eb508661143530d5b,68388.46821856499,50.0,False,-78.03348874757802,282,6768,1756462807,-52.87258603097264,1566858,1200,2025-08-29_12-20-07,{},210.7697913646698,24,282,68388.46821856499,"{'num_steps_sampled': 338400, 'num_steps_trained': 338400, 'default': {'policy_loss': -0.11916964501142502, 'vf_explained_var': 0.9402625560760498, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 11.273205757141113, 'entropy': 12.366950988769531, 'kl': 0.012861553579568863, 'total_loss': 11.167058944702148}, 'sample_time_ms': 218490.18, 'grad_time_ms': 701.265, 'load_time_ms': 1.714, 'update_time_ms': 2.677}",338400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},338400,cda-server-3,-51.17460335793359
+7ffa6ff4607a442eb508661143530d5b,68587.79872131348,50.0,False,-78.03348874757802,283,6792,1756463006,-52.52309877177729,1566858,1200,2025-08-29_12-23-26,{},199.33050274848938,24,283,68587.79872131348,"{'num_steps_sampled': 339600, 'num_steps_trained': 339600, 'default': {'policy_loss': -0.13121232390403748, 'vf_explained_var': 0.9545206427574158, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 6.784745693206787, 'entropy': 12.493606567382812, 'kl': 0.014576302841305733, 'total_loss': 6.668292045593262}, 'sample_time_ms': 219056.931, 'grad_time_ms': 700.902, 'load_time_ms': 1.7, 'update_time_ms': 2.642}",339600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},339600,cda-server-3,-48.200563271012534
+7ffa6ff4607a442eb508661143530d5b,68831.42337942123,50.0,False,-78.03348874757802,284,6816,1756463250,-52.34025074477718,1566858,1200,2025-08-29_12-27-30,{},243.62465810775757,24,284,68831.42337942123,"{'num_steps_sampled': 340800, 'num_steps_trained': 340800, 'default': {'policy_loss': -0.1302146315574646, 'vf_explained_var': 0.9693320393562317, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 3.9507997035980225, 'entropy': 12.259281158447266, 'kl': 0.015657953917980194, 'total_loss': 3.8364388942718506}, 'sample_time_ms': 217165.955, 'grad_time_ms': 700.227, 'load_time_ms': 1.689, 'update_time_ms': 2.648}",340800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},340800,cda-server-3,-48.200563271012534
+7ffa6ff4607a442eb508661143530d5b,69071.45431423187,50.0,False,-78.03348874757802,285,6840,1756463490,-52.354673693428815,1566858,1200,2025-08-29_12-31-30,{},240.03093481063843,24,285,69071.45431423187,"{'num_steps_sampled': 342000, 'num_steps_trained': 342000, 'default': {'policy_loss': -0.1259268820285797, 'vf_explained_var': 0.9753335118293762, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 3.3924663066864014, 'entropy': 12.422459602355957, 'kl': 0.016515301540493965, 'total_loss': 3.283261299133301}, 'sample_time_ms': 218912.421, 'grad_time_ms': 700.466, 'load_time_ms': 1.708, 'update_time_ms': 2.687}",342000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},342000,cda-server-3,-48.200563271012534
+7ffa6ff4607a442eb508661143530d5b,69299.64997696877,50.0,False,-58.96335390541665,286,6864,1756463718,-52.059089461532785,1566858,1200,2025-08-29_12-35-18,{},228.1956627368927,24,286,69299.64997696877,"{'num_steps_sampled': 343200, 'num_steps_trained': 343200, 'default': {'policy_loss': -0.13080231845378876, 'vf_explained_var': 0.9585863947868347, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 5.377427101135254, 'entropy': 12.568329811096191, 'kl': 0.016125712543725967, 'total_loss': 5.262951850891113}, 'sample_time_ms': 218823.497, 'grad_time_ms': 699.55, 'load_time_ms': 1.7, 'update_time_ms': 2.679}",343200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},343200,cda-server-3,-48.200563271012534
+7ffa6ff4607a442eb508661143530d5b,69511.73801374435,50.0,False,-58.96335390541665,287,6888,1756463930,-52.12903332033729,1566858,1200,2025-08-29_12-38-50,{},212.088036775589,24,287,69511.73801374435,"{'num_steps_sampled': 344400, 'num_steps_trained': 344400, 'default': {'policy_loss': -0.12991659343242645, 'vf_explained_var': 0.9701218008995056, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 3.9993748664855957, 'entropy': 12.348908424377441, 'kl': 0.014908598735928535, 'total_loss': 3.8845536708831787}, 'sample_time_ms': 220312.014, 'grad_time_ms': 699.283, 'load_time_ms': 1.701, 'update_time_ms': 2.656}",344400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},344400,cda-server-3,-49.00580362206023
+7ffa6ff4607a442eb508661143530d5b,69728.40817785263,50.0,False,-58.96335390541665,288,6912,1756464147,-52.207700063283,1566858,1200,2025-08-29_12-42-27,{},216.67016410827637,24,288,69728.40817785263,"{'num_steps_sampled': 345600, 'num_steps_trained': 345600, 'default': {'policy_loss': -0.13974148035049438, 'vf_explained_var': 0.9652450084686279, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 4.558967590332031, 'entropy': 12.57193374633789, 'kl': 0.017402615398168564, 'total_loss': 4.4368462562561035}, 'sample_time_ms': 220382.521, 'grad_time_ms': 699.166, 'load_time_ms': 1.696, 'update_time_ms': 2.646}",345600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},345600,cda-server-3,-48.96425296443912
+7ffa6ff4607a442eb508661143530d5b,69953.82830810547,50.0,False,-93.07593011966938,289,6936,1756464372,-52.56730745048848,1566858,1200,2025-08-29_12-46-12,{},225.42013025283813,24,289,69953.82830810547,"{'num_steps_sampled': 346800, 'num_steps_trained': 346800, 'default': {'policy_loss': -0.10083112120628357, 'vf_explained_var': 0.8962631225585938, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 20.528160095214844, 'entropy': 12.43372631072998, 'kl': 0.011682498268783092, 'total_loss': 20.439159393310547}, 'sample_time_ms': 217399.421, 'grad_time_ms': 699.073, 'load_time_ms': 1.708, 'update_time_ms': 2.537}",346800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},346800,cda-server-3,-48.96425296443912
+7ffa6ff4607a442eb508661143530d5b,70200.05345344543,50.0,False,-93.07593011966938,290,6960,1756464619,-53.04621499979686,1566858,1200,2025-08-29_12-50-19,{},246.22514533996582,24,290,70200.05345344543,"{'num_steps_sampled': 348000, 'num_steps_trained': 348000, 'default': {'policy_loss': -0.11693020910024643, 'vf_explained_var': 0.8908771872520447, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 26.90326690673828, 'entropy': 12.324344635009766, 'kl': 0.011365074664354324, 'total_loss': 26.79784393310547}, 'sample_time_ms': 220032.701, 'grad_time_ms': 698.208, 'load_time_ms': 1.683, 'update_time_ms': 2.529}",348000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},348000,cda-server-3,-48.96425296443912
+7ffa6ff4607a442eb508661143530d5b,70410.31812143326,50.0,False,-93.07593011966938,291,6984,1756464829,-53.13178389336556,1566858,1200,2025-08-29_12-53-49,{},210.2646679878235,24,291,70410.31812143326,"{'num_steps_sampled': 349200, 'num_steps_trained': 349200, 'default': {'policy_loss': -0.13924799859523773, 'vf_explained_var': 0.967040479183197, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 4.398654937744141, 'entropy': 12.41601848602295, 'kl': 0.01773855648934841, 'total_loss': 4.27736759185791}, 'sample_time_ms': 222554.442, 'grad_time_ms': 697.992, 'load_time_ms': 1.693, 'update_time_ms': 2.536}",349200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},349200,cda-server-3,-48.96425296443912
+7ffa6ff4607a442eb508661143530d5b,70627.03892922401,50.0,False,-93.07593011966938,292,7008,1756465046,-53.17242746128206,1566858,1200,2025-08-29_12-57-26,{},216.72080779075623,24,292,70627.03892922401,"{'num_steps_sampled': 350400, 'num_steps_trained': 350400, 'default': {'policy_loss': -0.12980133295059204, 'vf_explained_var': 0.968231737613678, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 4.2248711585998535, 'entropy': 12.26395034790039, 'kl': 0.01560777798295021, 'total_loss': 4.110872745513916}, 'sample_time_ms': 223150.609, 'grad_time_ms': 697.043, 'load_time_ms': 1.661, 'update_time_ms': 2.506}",350400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},350400,cda-server-3,-49.349361346491975
+7ffa6ff4607a442eb508661143530d5b,70878.18247318268,50.0,False,-90.1025840196358,293,7032,1756465297,-52.778861706694286,1566858,1200,2025-08-29_13-01-37,{},251.14354395866394,24,293,70878.18247318268,"{'num_steps_sampled': 351600, 'num_steps_trained': 351600, 'default': {'policy_loss': -0.14350180327892303, 'vf_explained_var': 0.9764432907104492, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 3.0745840072631836, 'entropy': 12.347380638122559, 'kl': 0.016932280734181404, 'total_loss': 2.9482264518737793}, 'sample_time_ms': 228332.209, 'grad_time_ms': 696.889, 'load_time_ms': 1.593, 'update_time_ms': 2.5}",351600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},351600,cda-server-3,-49.349361346491975
+7ffa6ff4607a442eb508661143530d5b,71095.06284427643,50.0,False,-61.96288074410334,294,7056,1756465514,-52.413159036952635,1566858,1200,2025-08-29_13-05-14,{},216.88037109375,24,294,71095.06284427643,"{'num_steps_sampled': 352800, 'num_steps_trained': 352800, 'default': {'policy_loss': -0.1359146684408188, 'vf_explained_var': 0.9747341871261597, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 3.547532796859741, 'entropy': 12.351963996887207, 'kl': 0.01467402745038271, 'total_loss': 3.4264755249023438}, 'sample_time_ms': 225658.221, 'grad_time_ms': 696.484, 'load_time_ms': 1.581, 'update_time_ms': 2.531}",352800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},352800,cda-server-3,-50.951736905180546
+7ffa6ff4607a442eb508661143530d5b,71324.26915335655,50.0,False,-58.954857103882475,295,7080,1756465743,-52.2415230325622,1566858,1200,2025-08-29_13-09-03,{},229.2063090801239,24,295,71324.26915335655,"{'num_steps_sampled': 354000, 'num_steps_trained': 354000, 'default': {'policy_loss': -0.13213542103767395, 'vf_explained_var': 0.9717539548873901, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 3.67763352394104, 'entropy': 12.306523323059082, 'kl': 0.013029721565544605, 'total_loss': 3.5586907863616943}, 'sample_time_ms': 224574.938, 'grad_time_ms': 697.288, 'load_time_ms': 1.568, 'update_time_ms': 2.525}",354000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},354000,cda-server-3,-50.02923476252851
+7ffa6ff4607a442eb508661143530d5b,71543.36113262177,50.0,False,-56.309158459773386,296,7104,1756465962,-52.17289624307936,1566858,1200,2025-08-29_13-12-42,{},219.091979265213,24,296,71543.36113262177,"{'num_steps_sampled': 355200, 'num_steps_trained': 355200, 'default': {'policy_loss': -0.1349836140871048, 'vf_explained_var': 0.9557469487190247, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 5.835244178771973, 'entropy': 12.222107887268066, 'kl': 0.01717188209295273, 'total_loss': 5.717647075653076}, 'sample_time_ms': 223664.96, 'grad_time_ms': 696.93, 'load_time_ms': 1.529, 'update_time_ms': 2.564}",355200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},355200,cda-server-3,-46.895097690446974
+7ffa6ff4607a442eb508661143530d5b,71764.0941464901,50.0,False,-96.42450975252484,297,7128,1756466183,-52.551490195022886,1566858,1200,2025-08-29_13-16-23,{},220.7330138683319,24,297,71764.0941464901,"{'num_steps_sampled': 356400, 'num_steps_trained': 356400, 'default': {'policy_loss': -0.10262128710746765, 'vf_explained_var': 0.9111029505729675, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 16.357921600341797, 'entropy': 12.320260047912598, 'kl': 0.011174225248396397, 'total_loss': 16.266613006591797}, 'sample_time_ms': 224530.074, 'grad_time_ms': 696.329, 'load_time_ms': 1.538, 'update_time_ms': 2.562}",356400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},356400,cda-server-3,-46.895097690446974
+7ffa6ff4607a442eb508661143530d5b,71956.04703903198,50.0,False,-96.42450975252484,298,7152,1756466375,-52.72924235526778,1566858,1200,2025-08-29_13-19-35,{},191.95289254188538,24,298,71956.04703903198,"{'num_steps_sampled': 357600, 'num_steps_trained': 357600, 'default': {'policy_loss': -0.13917165994644165, 'vf_explained_var': 0.9431633353233337, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 8.362993240356445, 'entropy': 12.179462432861328, 'kl': 0.016141919419169426, 'total_loss': 8.240165710449219}, 'sample_time_ms': 222058.576, 'grad_time_ms': 696.136, 'load_time_ms': 1.528, 'update_time_ms': 2.57}",357600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},357600,cda-server-3,-46.895097690446974
+7ffa6ff4607a442eb508661143530d5b,72197.96976613998,50.0,False,-96.42450975252484,299,7176,1756466617,-53.04186033195124,1566858,1200,2025-08-29_13-23-37,{},241.9227271080017,24,299,72197.96976613998,"{'num_steps_sampled': 358800, 'num_steps_trained': 358800, 'default': {'policy_loss': -0.13167642056941986, 'vf_explained_var': 0.9527842402458191, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 7.93591833114624, 'entropy': 12.2572660446167, 'kl': 0.013497140258550644, 'total_loss': 7.81790828704834}, 'sample_time_ms': 223709.85, 'grad_time_ms': 695.264, 'load_time_ms': 1.463, 'update_time_ms': 2.553}",358800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},358800,cda-server-3,-46.895097690446974
+7ffa6ff4607a442eb508661143530d5b,72459.24091768265,50.0,False,-99.06848104185677,300,7200,1756466878,-53.682056333677174,1566858,1200,2025-08-29_13-27-58,{},261.2711515426636,24,300,72459.24091768265,"{'num_steps_sampled': 360000, 'num_steps_trained': 360000, 'default': {'policy_loss': -0.13141396641731262, 'vf_explained_var': 0.9265610575675964, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 13.346891403198242, 'entropy': 12.274971961975098, 'kl': 0.015094866044819355, 'total_loss': 13.230761528015137}, 'sample_time_ms': 225215.112, 'grad_time_ms': 694.581, 'load_time_ms': 1.466, 'update_time_ms': 2.564}",360000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},360000,cda-server-3,-48.35379212325632
+7ffa6ff4607a442eb508661143530d5b,72683.35219526291,50.0,False,-99.17453996516333,301,7224,1756467102,-53.750167833270154,1566858,1200,2025-08-29_13-31-42,{},224.11127758026123,24,301,72683.35219526291,"{'num_steps_sampled': 361200, 'num_steps_trained': 361200, 'default': {'policy_loss': -0.1376352310180664, 'vf_explained_var': 0.9457657933235168, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 9.347111701965332, 'entropy': 12.26346206665039, 'kl': 0.01422292459756136, 'total_loss': 9.223877906799316}, 'sample_time_ms': 226600.323, 'grad_time_ms': 694.031, 'load_time_ms': 1.449, 'update_time_ms': 2.571}",361200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},361200,cda-server-3,-48.35379212325632
+7ffa6ff4607a442eb508661143530d5b,72948.92363882065,50.0,False,-99.17453996516333,302,7248,1756467368,-53.607525853227514,1566858,1200,2025-08-29_13-36-08,{},265.57144355773926,24,302,72948.92363882065,"{'num_steps_sampled': 362400, 'num_steps_trained': 362400, 'default': {'policy_loss': -0.1350909024477005, 'vf_explained_var': 0.9636004567146301, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 4.889998435974121, 'entropy': 12.176090240478516, 'kl': 0.01710471510887146, 'total_loss': 4.772226333618164}, 'sample_time_ms': 231485.525, 'grad_time_ms': 693.899, 'load_time_ms': 1.453, 'update_time_ms': 2.591}",362400,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},362400,cda-server-3,-48.35379212325632
+7ffa6ff4607a442eb508661143530d5b,73200.5001718998,50.0,False,-99.17453996516333,303,7272,1756467619,-53.29212436288077,1566858,1200,2025-08-29_13-40-19,{},251.57653307914734,24,303,73200.5001718998,"{'num_steps_sampled': 363600, 'num_steps_trained': 363600, 'default': {'policy_loss': -0.1493072360754013, 'vf_explained_var': 0.9692809581756592, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 4.034963607788086, 'entropy': 12.088041305541992, 'kl': 0.016667162999510765, 'total_loss': 3.902531623840332}, 'sample_time_ms': 231528.872, 'grad_time_ms': 693.734, 'load_time_ms': 1.515, 'update_time_ms': 2.606}",363600,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},363600,cda-server-3,-50.15217415635844
+7ffa6ff4607a442eb508661143530d5b,73480.11277294159,50.0,False,-99.17453996516333,304,7296,1756467899,-52.89721532106234,1566858,1200,2025-08-29_13-44-59,{},279.6126010417938,24,304,73480.11277294159,"{'num_steps_sampled': 364800, 'num_steps_trained': 364800, 'default': {'policy_loss': -0.11796130239963531, 'vf_explained_var': 0.9274365305900574, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 10.442138671875, 'entropy': 12.077159881591797, 'kl': 0.013736975379288197, 'total_loss': 10.338085174560547}, 'sample_time_ms': 237800.827, 'grad_time_ms': 694.929, 'load_time_ms': 1.529, 'update_time_ms': 2.62}",364800,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},364800,cda-server-3,-49.92233445051167
+7ffa6ff4607a442eb508661143530d5b,73758.79197740555,50.0,False,-67.96758924730126,305,7320,1756468178,-52.48637423160558,1566858,1200,2025-08-29_13-49-38,{},278.67920446395874,24,305,73758.79197740555,"{'num_steps_sampled': 366000, 'num_steps_trained': 366000, 'default': {'policy_loss': -0.12833836674690247, 'vf_explained_var': 0.9781603217124939, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 2.9315381050109863, 'entropy': 11.94629955291748, 'kl': 0.015499631874263287, 'total_loss': 2.8188929557800293}, 'sample_time_ms': 242748.703, 'grad_time_ms': 694.368, 'load_time_ms': 1.537, 'update_time_ms': 2.623}",366000,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},366000,cda-server-3,-49.92233445051167
+7ffa6ff4607a442eb508661143530d5b,73987.31811928749,50.0,False,-67.96758924730126,306,7344,1756468406,-52.524258915937445,1566858,1200,2025-08-29_13-53-26,{},228.52614188194275,24,306,73987.31811928749,"{'num_steps_sampled': 367200, 'num_steps_trained': 367200, 'default': {'policy_loss': -0.12210477888584137, 'vf_explained_var': 0.9534997940063477, 'cur_lr': 4.999999873689376e-05, 'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 6.509614944458008, 'entropy': 11.96830940246582, 'kl': 0.012901661917567253, 'total_loss': 6.400572776794434}, 'sample_time_ms': 243691.988, 'grad_time_ms': 694.451, 'load_time_ms': 1.575, 'update_time_ms': 2.593}",367200,"{'input': 'sampler', 'simple_optimizer': False, 'tf_session_args': {'allow_soft_placement': True, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'intra_op_parallelism_threads': 2}, 'postprocess_inputs': False, 'observation_filter': 'MeanStdFilter', 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'num_gpus': 0, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'grad_clip': None, 'vf_loss_coeff': 1.0, 'gamma': 0.99, 'input_evaluation': None, 'sample_async': False, 'vf_share_layers': False, 'sample_batch_size': 200, 'output_max_file_size': 67108864, 'preprocessor_pref': 'deepmind', 'env_config': {'generalize': True, 'run_valid': False}, 'clip_actions': True, 'kl_coeff': 0.2, 'num_envs_per_worker': 1, 'monitor': False, 'kl_target': 0.01, 'env': 'LEDRO_D_FC', 'lr_schedule': None, 'lr': 5e-05, 'collect_metrics_timeout': 180, 'lambda': 1.0, 'num_workers': 3, 'num_cpus_for_driver': 1, 'custom_resources_per_worker': {}, 'multiagent': {'policy_graphs': {}, 'policies_to_train': None, 'policy_mapping_fn': None}, 'train_batch_size': 1200, 'callbacks': {'on_episode_step': None, 'on_sample_end': None, 'on_episode_start': None, 'on_train_result': None, 'on_episode_end': None}, 'optimizer': {}, 'num_gpus_per_worker': 0, 'entropy_coeff': 0.0, 'num_cpus_per_worker': 1, 'synchronize_filters': True, 'output_compress_columns': ['obs', 'new_obs'], 'clip_rewards': None, 'log_level': 'INFO', 'compress_observations': False, 'model': {'conv_activation': 'relu', 'free_log_std': False, 'fcnet_activation': 'tanh', 'lstm_use_prev_action_reward': False, 'conv_filters': None, 'lstm_cell_size': 256, 'use_lstm': False, 'grayscale': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'custom_options': {}, 'framestack': True, 'custom_preprocessor': None, 'custom_model': None, 'zero_mean': True, 'squash_to_range': False}, 'batch_mode': 'truncate_episodes', 'vf_clip_param': 10.0, 'num_sgd_iter': 30, 'horizon': 50, 'straggler_mitigation': False, 'output': None, 'use_gae': True}",10.157.146.3,0,{},367200,cda-server-3,-48.9968998602918
diff --git a/experiments/ledro_d_fc_7nm_run3/PPO_LEDRO_D_FC_0_2025-08-28_17-19-59h7o8x_d0/result.json b/experiments/ledro_d_fc_7nm_run3/PPO_LEDRO_D_FC_0_2025-08-28_17-19-59h7o8x_d0/result.json
new file mode 100644
index 0000000..2b080c2
--- /dev/null
+++ b/experiments/ledro_d_fc_7nm_run3/PPO_LEDRO_D_FC_0_2025-08-28_17-19-59h7o8x_d0/result.json
@@ -0,0 +1,306 @@
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 231.39491868019104, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -146.78393839650298, "iterations_since_restore": 1, "episodes_total": 24, "timestamp": 1756394647, "episode_reward_mean": -129.08330393143353, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_17-24-07", "policy_reward_mean": {}, "time_this_iter_s": 231.39491868019104, "episodes_this_iter": 24, "training_iteration": 1, "time_total_s": 231.39491868019104, "info": {"num_steps_sampled": 1200, "num_steps_trained": 1200, "default": {"policy_loss": -0.12120606005191803, "vf_explained_var": 0.018705738708376884, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 0.20000000298023224, "vf_loss": 4254.23876953125, "entropy": 18.654157638549805, "kl": 0.02240253984928131, "total_loss": 4254.12255859375}, "sample_time_ms": 226832.15, "grad_time_ms": 2279.741, "load_time_ms": 148.38, "update_time_ms": 2016.317}, "timesteps_total": 1200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 1200, "hostname": "cda-server-3", "episode_reward_max": -99.99993258306239}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 439.889981508255, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -146.78393839650298, "iterations_since_restore": 2, "episodes_total": 48, "timestamp": 1756394856, "episode_reward_mean": -127.32490473992193, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_17-27-36", "policy_reward_mean": {}, "time_this_iter_s": 208.49506282806396, "episodes_this_iter": 24, "training_iteration": 2, "time_total_s": 439.889981508255, "info": {"num_steps_sampled": 2400, "num_steps_trained": 2400, "default": {"policy_loss": -0.12993724644184113, "vf_explained_var": 0.11479301005601883, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 0.30000001192092896, "vf_loss": 3653.26953125, "entropy": 18.634702682495117, "kl": 0.023673098534345627, "total_loss": 3653.146728515625}, "sample_time_ms": 217313.857, "grad_time_ms": 1484.979, "load_time_ms": 74.976, "update_time_ms": 1009.617}, "timesteps_total": 2400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 2400, "hostname": "cda-server-3", "episode_reward_max": -99.54185984989468}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 661.38379073143, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -146.78393839650298, "iterations_since_restore": 3, "episodes_total": 72, "timestamp": 1756395078, "episode_reward_mean": -127.62901381105137, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_17-31-18", "policy_reward_mean": {}, "time_this_iter_s": 221.49380922317505, "episodes_this_iter": 24, "training_iteration": 3, "time_total_s": 661.38379073143, "info": {"num_steps_sampled": 3600, "num_steps_trained": 3600, "default": {"policy_loss": -0.13941305875778198, "vf_explained_var": 0.11090646684169769, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 0.44999995827674866, "vf_loss": 3644.2900390625, "entropy": 18.60210418701172, "kl": 0.02471771091222763, "total_loss": 3644.161865234375}, "sample_time_ms": 218474.294, "grad_time_ms": 1219.646, "load_time_ms": 50.629, "update_time_ms": 673.919}, "timesteps_total": 3600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 3600, "hostname": "cda-server-3", "episode_reward_max": -99.54185984989468}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 891.4586873054504, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -146.78393839650298, "iterations_since_restore": 4, "episodes_total": 96, "timestamp": 1756395308, "episode_reward_mean": -125.83527627708632, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_17-35-08", "policy_reward_mean": {}, "time_this_iter_s": 230.07489657402039, "episodes_this_iter": 24, "training_iteration": 4, "time_total_s": 891.4586873054504, "info": {"num_steps_sampled": 4800, "num_steps_trained": 4800, "default": {"policy_loss": -0.12359528988599777, "vf_explained_var": 0.11000010371208191, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 0.675000011920929, "vf_loss": 3012.851806640625, "entropy": 18.575050354003906, "kl": 0.019558193162083626, "total_loss": 3012.7412109375}, "sample_time_ms": 221199.687, "grad_time_ms": 1086.971, "load_time_ms": 38.45, "update_time_ms": 506.159}, "timesteps_total": 4800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 4800, "hostname": "cda-server-3", "episode_reward_max": -98.49905122783261}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 1150.402874469757, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -146.0541023313413, "iterations_since_restore": 5, "episodes_total": 120, "timestamp": 1756395567, "episode_reward_mean": -124.10875304099744, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_17-39-27", "policy_reward_mean": {}, "time_this_iter_s": 258.94418716430664, "episodes_this_iter": 24, "training_iteration": 5, "time_total_s": 1150.402874469757, "info": {"num_steps_sampled": 6000, "num_steps_trained": 6000, "default": {"policy_loss": -0.12141091376543045, "vf_explained_var": 0.05904542654752731, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 0.675000011920929, "vf_loss": 2893.345703125, "entropy": 18.560523986816406, "kl": 0.0196517501026392, "total_loss": 2893.237548828125}, "sample_time_ms": 228606.987, "grad_time_ms": 1009.142, "load_time_ms": 31.135, "update_time_ms": 405.444}, "timesteps_total": 6000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 6000, "hostname": "cda-server-3", "episode_reward_max": -98.49905122783261}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 1408.9546167850494, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -146.0541023313413, "iterations_since_restore": 6, "episodes_total": 144, "timestamp": 1756395825, "episode_reward_mean": -122.72888846822445, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_17-43-45", "policy_reward_mean": {}, "time_this_iter_s": 258.55174231529236, "episodes_this_iter": 24, "training_iteration": 6, "time_total_s": 1408.9546167850494, "info": {"num_steps_sampled": 7200, "num_steps_trained": 7200, "default": {"policy_loss": -0.13973921537399292, "vf_explained_var": 0.011485014110803604, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 0.675000011920929, "vf_loss": 2471.14990234375, "entropy": 18.532447814941406, "kl": 0.019554639235138893, "total_loss": 2471.023193359375}, "sample_time_ms": 233480.97, "grad_time_ms": 956.137, "load_time_ms": 26.219, "update_time_ms": 338.316}, "timesteps_total": 7200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 7200, "hostname": "cda-server-3", "episode_reward_max": -98.49905122783261}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 1674.9645681381226, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -146.44951359018535, "iterations_since_restore": 7, "episodes_total": 168, "timestamp": 1756396091, "episode_reward_mean": -121.15475903464372, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_17-48-11", "policy_reward_mean": {}, "time_this_iter_s": 266.0099513530731, "episodes_this_iter": 24, "training_iteration": 7, "time_total_s": 1674.9645681381226, "info": {"num_steps_sampled": 8400, "num_steps_trained": 8400, "default": {"policy_loss": -0.13989777863025665, "vf_explained_var": 0.01834733597934246, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 0.675000011920929, "vf_loss": 2349.50146484375, "entropy": 18.50861358642578, "kl": 0.02123822271823883, "total_loss": 2349.376220703125}, "sample_time_ms": 238027.997, "grad_time_ms": 918.38, "load_time_ms": 22.601, "update_time_ms": 290.334}, "timesteps_total": 8400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 8400, "hostname": "cda-server-3", "episode_reward_max": -98.49905122783261}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 1941.3925409317017, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -146.44951359018535, "iterations_since_restore": 8, "episodes_total": 192, "timestamp": 1756396358, "episode_reward_mean": -120.81588605798613, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_17-52-38", "policy_reward_mean": {}, "time_this_iter_s": 266.4279727935791, "episodes_this_iter": 24, "training_iteration": 8, "time_total_s": 1941.3925409317017, "info": {"num_steps_sampled": 9600, "num_steps_trained": 9600, "default": {"policy_loss": -0.12228532880544662, "vf_explained_var": 0.009332027286291122, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 2376.759521484375, "entropy": 18.485597610473633, "kl": 0.017213426530361176, "total_loss": 2376.654541015625}, "sample_time_ms": 241490.303, "grad_time_ms": 890.103, "load_time_ms": 19.985, "update_time_ms": 254.345}, "timesteps_total": 9600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 9600, "hostname": "cda-server-3", "episode_reward_max": -88.09294395093761}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 2161.997076511383, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -146.83873104448023, "iterations_since_restore": 9, "episodes_total": 216, "timestamp": 1756396578, "episode_reward_mean": -119.28984459236621, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_17-56-18", "policy_reward_mean": {}, "time_this_iter_s": 220.6045355796814, "episodes_this_iter": 24, "training_iteration": 9, "time_total_s": 2161.997076511383, "info": {"num_steps_sampled": 10800, "num_steps_trained": 10800, "default": {"policy_loss": -0.13750998675823212, "vf_explained_var": 0.00047000250197015703, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 1895.60546875, "entropy": 18.46615219116211, "kl": 0.017844107002019882, "total_loss": 1895.48583984375}, "sample_time_ms": 239091.05, "grad_time_ms": 868.78, "load_time_ms": 17.938, "update_time_ms": 226.372}, "timesteps_total": 10800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 10800, "hostname": "cda-server-3", "episode_reward_max": -88.09294395093761}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 2368.387995481491, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -146.83873104448023, "iterations_since_restore": 10, "episodes_total": 240, "timestamp": 1756396785, "episode_reward_mean": -118.7602112144562, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_17-59-45", "policy_reward_mean": {}, "time_this_iter_s": 206.39091897010803, "episodes_this_iter": 24, "training_iteration": 10, "time_total_s": 2368.387995481491, "info": {"num_steps_sampled": 12000, "num_steps_trained": 12000, "default": {"policy_loss": -0.12456995993852615, "vf_explained_var": 0.041680652648210526, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 1867.90771484375, "entropy": 18.442859649658203, "kl": 0.018277890980243683, "total_loss": 1867.8016357421875}, "sample_time_ms": 235750.178, "grad_time_ms": 851.815, "load_time_ms": 16.329, "update_time_ms": 203.983}, "timesteps_total": 12000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 12000, "hostname": "cda-server-3", "episode_reward_max": -88.09294395093761}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 2639.416999101639, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -146.83873104448023, "iterations_since_restore": 11, "episodes_total": 264, "timestamp": 1756397056, "episode_reward_mean": -118.20574028935748, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_18-04-16", "policy_reward_mean": {}, "time_this_iter_s": 271.0290036201477, "episodes_this_iter": 24, "training_iteration": 11, "time_total_s": 2639.416999101639, "info": {"num_steps_sampled": 13200, "num_steps_trained": 13200, "default": {"policy_loss": -0.1394842118024826, "vf_explained_var": 0.02399369142949581, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 1760.54541015625, "entropy": 18.41582489013672, "kl": 0.0173909030854702, "total_loss": 1760.423583984375}, "sample_time_ms": 240099.86, "grad_time_ms": 692.911, "load_time_ms": 1.659, "update_time_ms": 2.59}, "timesteps_total": 13200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 13200, "hostname": "cda-server-3", "episode_reward_max": -88.09294395093761}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 2889.085036754608, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -146.83873104448023, "iterations_since_restore": 12, "episodes_total": 288, "timestamp": 1756397305, "episode_reward_mean": -116.34781812997744, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_18-08-25", "policy_reward_mean": {}, "time_this_iter_s": 249.66803765296936, "episodes_this_iter": 24, "training_iteration": 12, "time_total_s": 2889.085036754608, "info": {"num_steps_sampled": 14400, "num_steps_trained": 14400, "default": {"policy_loss": -0.131776362657547, "vf_explained_var": 0.08143580704927444, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 1517.3621826171875, "entropy": 18.392175674438477, "kl": 0.015726102516055107, "total_loss": 1517.24609375}, "sample_time_ms": 244216.386, "grad_time_ms": 693.713, "load_time_ms": 1.664, "update_time_ms": 2.543}, "timesteps_total": 14400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 14400, "hostname": "cda-server-3", "episode_reward_max": -95.21420483749228}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 3180.254895925522, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -142.98441497447922, "iterations_since_restore": 13, "episodes_total": 312, "timestamp": 1756397596, "episode_reward_mean": -115.62934410428164, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_18-13-16", "policy_reward_mean": {}, "time_this_iter_s": 291.1698591709137, "episodes_this_iter": 24, "training_iteration": 13, "time_total_s": 3180.254895925522, "info": {"num_steps_sampled": 15600, "num_steps_trained": 15600, "default": {"policy_loss": -0.14231985807418823, "vf_explained_var": 0.08726880699396133, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 1588.935791015625, "entropy": 18.384498596191406, "kl": 0.01738560199737549, "total_loss": 1588.8111572265625}, "sample_time_ms": 251184.496, "grad_time_ms": 693.321, "load_time_ms": 1.568, "update_time_ms": 2.557}, "timesteps_total": 15600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 15600, "hostname": "cda-server-3", "episode_reward_max": -83.97588886261303}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 3432.3409848213196, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -143.8383056089926, "iterations_since_restore": 14, "episodes_total": 336, "timestamp": 1756397849, "episode_reward_mean": -115.66906308452896, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_18-17-29", "policy_reward_mean": {}, "time_this_iter_s": 252.08608889579773, "episodes_this_iter": 24, "training_iteration": 14, "time_total_s": 3432.3409848213196, "info": {"num_steps_sampled": 16800, "num_steps_trained": 16800, "default": {"policy_loss": -0.13075391948223114, "vf_explained_var": 0.10596006363630295, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 1400.822509765625, "entropy": 18.35945701599121, "kl": 0.015562936663627625, "total_loss": 1400.7073974609375}, "sample_time_ms": 253385.056, "grad_time_ms": 693.955, "load_time_ms": 1.52, "update_time_ms": 2.523}, "timesteps_total": 16800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 16800, "hostname": "cda-server-3", "episode_reward_max": -83.97588886261303}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 3690.3307423591614, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -143.8383056089926, "iterations_since_restore": 15, "episodes_total": 360, "timestamp": 1756398107, "episode_reward_mean": -114.85649792242968, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_18-21-47", "policy_reward_mean": {}, "time_this_iter_s": 257.9897575378418, "episodes_this_iter": 24, "training_iteration": 15, "time_total_s": 3690.3307423591614, "info": {"num_steps_sampled": 18000, "num_steps_trained": 18000, "default": {"policy_loss": -0.13020434975624084, "vf_explained_var": 0.17911416292190552, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 1270.36865234375, "entropy": 18.33188819885254, "kl": 0.01751522161066532, "total_loss": 1270.256103515625}, "sample_time_ms": 253290.003, "grad_time_ms": 693.672, "load_time_ms": 1.478, "update_time_ms": 2.533}, "timesteps_total": 18000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 18000, "hostname": "cda-server-3", "episode_reward_max": -83.97588886261303}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 3912.750263929367, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -143.8383056089926, "iterations_since_restore": 16, "episodes_total": 384, "timestamp": 1756398329, "episode_reward_mean": -114.61370286216462, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_18-25-29", "policy_reward_mean": {}, "time_this_iter_s": 222.4195215702057, "episodes_this_iter": 24, "training_iteration": 16, "time_total_s": 3912.750263929367, "info": {"num_steps_sampled": 19200, "num_steps_trained": 19200, "default": {"policy_loss": -0.13605083525180817, "vf_explained_var": 0.17312727868556976, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 1203.89111328125, "entropy": 18.3139591217041, "kl": 0.017916101962327957, "total_loss": 1203.7730712890625}, "sample_time_ms": 249676.023, "grad_time_ms": 694.436, "load_time_ms": 1.49, "update_time_ms": 2.497}, "timesteps_total": 19200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 19200, "hostname": "cda-server-3", "episode_reward_max": -83.97588886261303}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 4148.901806116104, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -143.8383056089926, "iterations_since_restore": 17, "episodes_total": 408, "timestamp": 1756398565, "episode_reward_mean": -115.1950941298017, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_18-29-25", "policy_reward_mean": {}, "time_this_iter_s": 236.15154218673706, "episodes_this_iter": 24, "training_iteration": 17, "time_total_s": 4148.901806116104, "info": {"num_steps_sampled": 20400, "num_steps_trained": 20400, "default": {"policy_loss": -0.1500139832496643, "vf_explained_var": 0.20809487998485565, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 1293.311767578125, "entropy": 18.267717361450195, "kl": 0.019330434501171112, "total_loss": 1293.18115234375}, "sample_time_ms": 246689.11, "grad_time_ms": 695.366, "load_time_ms": 1.557, "update_time_ms": 2.53}, "timesteps_total": 20400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 20400, "hostname": "cda-server-3", "episode_reward_max": -98.04220398724607}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 4419.96648812294, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -145.8632685496317, "iterations_since_restore": 18, "episodes_total": 432, "timestamp": 1756398836, "episode_reward_mean": -114.82608095291198, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_18-33-56", "policy_reward_mean": {}, "time_this_iter_s": 271.06468200683594, "episodes_this_iter": 24, "training_iteration": 18, "time_total_s": 4419.96648812294, "info": {"num_steps_sampled": 21600, "num_steps_trained": 21600, "default": {"policy_loss": -0.1369973123073578, "vf_explained_var": 0.21514759957790375, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 1099.616943359375, "entropy": 18.250120162963867, "kl": 0.01694124937057495, "total_loss": 1099.4969482421875}, "sample_time_ms": 247152.753, "grad_time_ms": 695.384, "load_time_ms": 1.549, "update_time_ms": 2.557}, "timesteps_total": 21600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 21600, "hostname": "cda-server-3", "episode_reward_max": -94.07099127019934}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 4666.24494099617, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -145.8632685496317, "iterations_since_restore": 19, "episodes_total": 456, "timestamp": 1756399083, "episode_reward_mean": -113.63070519496996, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_18-38-03", "policy_reward_mean": {}, "time_this_iter_s": 246.27845287322998, "episodes_this_iter": 24, "training_iteration": 19, "time_total_s": 4666.24494099617, "info": {"num_steps_sampled": 22800, "num_steps_trained": 22800, "default": {"policy_loss": -0.13419102132320404, "vf_explained_var": 0.23938888311386108, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 996.186279296875, "entropy": 18.23851776123047, "kl": 0.01877405494451523, "total_loss": 996.071044921875}, "sample_time_ms": 249720.524, "grad_time_ms": 694.96, "load_time_ms": 1.56, "update_time_ms": 2.58}, "timesteps_total": 22800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 22800, "hostname": "cda-server-3", "episode_reward_max": -92.51656606985235}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 4908.511640548706, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -145.8632685496317, "iterations_since_restore": 20, "episodes_total": 480, "timestamp": 1756399325, "episode_reward_mean": -113.29456813555431, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_18-42-05", "policy_reward_mean": {}, "time_this_iter_s": 242.266699552536, "episodes_this_iter": 24, "training_iteration": 20, "time_total_s": 4908.511640548706, "info": {"num_steps_sampled": 24000, "num_steps_trained": 24000, "default": {"policy_loss": -0.1307111382484436, "vf_explained_var": 0.3056492805480957, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 885.0294189453125, "entropy": 18.227909088134766, "kl": 0.017692746594548225, "total_loss": 884.9165649414062}, "sample_time_ms": 253308.428, "grad_time_ms": 694.623, "load_time_ms": 1.537, "update_time_ms": 2.601}, "timesteps_total": 24000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 24000, "hostname": "cda-server-3", "episode_reward_max": -92.51656606985235}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 5115.891381978989, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -145.8632685496317, "iterations_since_restore": 21, "episodes_total": 504, "timestamp": 1756399532, "episode_reward_mean": -112.84483958739845, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_18-45-32", "policy_reward_mean": {}, "time_this_iter_s": 207.3797414302826, "episodes_this_iter": 24, "training_iteration": 21, "time_total_s": 5115.891381978989, "info": {"num_steps_sampled": 25200, "num_steps_trained": 25200, "default": {"policy_loss": -0.14466862380504608, "vf_explained_var": 0.31529197096824646, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 974.6930541992188, "entropy": 18.17812156677246, "kl": 0.017108624801039696, "total_loss": 974.5657348632812}, "sample_time_ms": 246943.138, "grad_time_ms": 695.042, "load_time_ms": 1.535, "update_time_ms": 2.59}, "timesteps_total": 25200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 25200, "hostname": "cda-server-3", "episode_reward_max": -92.51656606985235}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 5416.202656984329, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -145.31539173741282, "iterations_since_restore": 22, "episodes_total": 528, "timestamp": 1756399832, "episode_reward_mean": -110.93474544247985, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_18-50-32", "policy_reward_mean": {}, "time_this_iter_s": 300.3112750053406, "episodes_this_iter": 24, "training_iteration": 22, "time_total_s": 5416.202656984329, "info": {"num_steps_sampled": 26400, "num_steps_trained": 26400, "default": {"policy_loss": -0.13921838998794556, "vf_explained_var": 0.35455378890037537, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 688.4326171875, "entropy": 18.171295166015625, "kl": 0.016766492277383804, "total_loss": 688.3103637695312}, "sample_time_ms": 252007.87, "grad_time_ms": 694.557, "load_time_ms": 1.591, "update_time_ms": 2.634}, "timesteps_total": 26400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 26400, "hostname": "cda-server-3", "episode_reward_max": -89.64457416011744}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 5694.230200052261, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -144.2697605141167, "iterations_since_restore": 23, "episodes_total": 552, "timestamp": 1756400111, "episode_reward_mean": -110.89697706022662, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_18-55-11", "policy_reward_mean": {}, "time_this_iter_s": 278.02754306793213, "episodes_this_iter": 24, "training_iteration": 23, "time_total_s": 5694.230200052261, "info": {"num_steps_sampled": 27600, "num_steps_trained": 27600, "default": {"policy_loss": -0.13763722777366638, "vf_explained_var": 0.3888266980648041, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 731.5033569335938, "entropy": 18.151676177978516, "kl": 0.01764022745192051, "total_loss": 731.3836059570312}, "sample_time_ms": 250692.213, "grad_time_ms": 695.901, "load_time_ms": 1.648, "update_time_ms": 2.636}, "timesteps_total": 27600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 27600, "hostname": "cda-server-3", "episode_reward_max": -88.93574451772085}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 5979.111471414566, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -143.04836334373098, "iterations_since_restore": 24, "episodes_total": 576, "timestamp": 1756400395, "episode_reward_mean": -111.36213150222491, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_18-59-55", "policy_reward_mean": {}, "time_this_iter_s": 284.8812713623047, "episodes_this_iter": 24, "training_iteration": 24, "time_total_s": 5979.111471414566, "info": {"num_steps_sampled": 28800, "num_steps_trained": 28800, "default": {"policy_loss": -0.1553221344947815, "vf_explained_var": 0.3876085877418518, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 800.6702270507812, "entropy": 18.09413719177246, "kl": 0.018143318593502045, "total_loss": 800.5332641601562}, "sample_time_ms": 253971.919, "grad_time_ms": 695.703, "load_time_ms": 1.672, "update_time_ms": 2.644}, "timesteps_total": 28800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 28800, "hostname": "cda-server-3", "episode_reward_max": -88.93574451772085}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 6190.038968324661, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -142.52618813170668, "iterations_since_restore": 25, "episodes_total": 600, "timestamp": 1756400606, "episode_reward_mean": -110.11096078319713, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_19-03-26", "policy_reward_mean": {}, "time_this_iter_s": 210.92749691009521, "episodes_this_iter": 24, "training_iteration": 25, "time_total_s": 6190.038968324661, "info": {"num_steps_sampled": 30000, "num_steps_trained": 30000, "default": {"policy_loss": -0.13792775571346283, "vf_explained_var": 0.2672511339187622, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 757.8585205078125, "entropy": 18.086666107177734, "kl": 0.017636993899941444, "total_loss": 757.7384643554688}, "sample_time_ms": 249265.378, "grad_time_ms": 695.932, "load_time_ms": 1.705, "update_time_ms": 2.651}, "timesteps_total": 30000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 30000, "hostname": "cda-server-3", "episode_reward_max": -87.96881449444385}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 6434.35960817337, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -142.52618813170668, "iterations_since_restore": 26, "episodes_total": 624, "timestamp": 1756400851, "episode_reward_mean": -108.79258472972552, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_19-07-31", "policy_reward_mean": {}, "time_this_iter_s": 244.3206398487091, "episodes_this_iter": 24, "training_iteration": 26, "time_total_s": 6434.35960817337, "info": {"num_steps_sampled": 31200, "num_steps_trained": 31200, "default": {"policy_loss": -0.13855737447738647, "vf_explained_var": 0.3324964642524719, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 534.9935913085938, "entropy": 18.05270004272461, "kl": 0.015438605099916458, "total_loss": 534.8707275390625}, "sample_time_ms": 251456.213, "grad_time_ms": 695.196, "load_time_ms": 1.709, "update_time_ms": 2.694}, "timesteps_total": 31200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 31200, "hostname": "cda-server-3", "episode_reward_max": -87.96881449444385}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 6672.771792173386, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -141.16678514474953, "iterations_since_restore": 27, "episodes_total": 648, "timestamp": 1756401089, "episode_reward_mean": -106.84075375099816, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_19-11-29", "policy_reward_mean": {}, "time_this_iter_s": 238.41218400001526, "episodes_this_iter": 24, "training_iteration": 27, "time_total_s": 6672.771792173386, "info": {"num_steps_sampled": 32400, "num_steps_trained": 32400, "default": {"policy_loss": -0.14445364475250244, "vf_explained_var": 0.39279234409332275, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 483.4596862792969, "entropy": 18.03901481628418, "kl": 0.016610559076070786, "total_loss": 483.33209228515625}, "sample_time_ms": 251682.44, "grad_time_ms": 695.081, "load_time_ms": 1.711, "update_time_ms": 2.652}, "timesteps_total": 32400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 32400, "hostname": "cda-server-3", "episode_reward_max": -87.96881449444385}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 6921.277290582657, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -140.2992540424679, "iterations_since_restore": 28, "episodes_total": 672, "timestamp": 1756401338, "episode_reward_mean": -104.82396678370964, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_19-15-38", "policy_reward_mean": {}, "time_this_iter_s": 248.50549840927124, "episodes_this_iter": 24, "training_iteration": 28, "time_total_s": 6921.277290582657, "info": {"num_steps_sampled": 33600, "num_steps_trained": 33600, "default": {"policy_loss": -0.1346297711133957, "vf_explained_var": 0.4250890910625458, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 527.1061401367188, "entropy": 18.05461883544922, "kl": 0.016484878957271576, "total_loss": 526.9881591796875}, "sample_time_ms": 249426.827, "grad_time_ms": 694.746, "load_time_ms": 1.73, "update_time_ms": 2.652}, "timesteps_total": 33600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 33600, "hostname": "cda-server-3", "episode_reward_max": -87.96881449444385}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 7166.122593641281, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -143.76604889515352, "iterations_since_restore": 29, "episodes_total": 696, "timestamp": 1756401582, "episode_reward_mean": -103.82999386622753, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_19-19-42", "policy_reward_mean": {}, "time_this_iter_s": 244.84530305862427, "episodes_this_iter": 24, "training_iteration": 29, "time_total_s": 7166.122593641281, "info": {"num_steps_sampled": 34800, "num_steps_trained": 34800, "default": {"policy_loss": -0.1438552737236023, "vf_explained_var": 0.28951722383499146, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 612.649658203125, "entropy": 18.002059936523438, "kl": 0.016011489555239677, "total_loss": 612.5220336914062}, "sample_time_ms": 249283.478, "grad_time_ms": 694.771, "load_time_ms": 1.725, "update_time_ms": 2.634}, "timesteps_total": 34800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 34800, "hostname": "cda-server-3", "episode_reward_max": -88.04797756183808}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 7463.127463102341, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -143.76604889515352, "iterations_since_restore": 30, "episodes_total": 720, "timestamp": 1756401879, "episode_reward_mean": -103.3184289517542, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_19-24-39", "policy_reward_mean": {}, "time_this_iter_s": 297.00486946105957, "episodes_this_iter": 24, "training_iteration": 30, "time_total_s": 7463.127463102341, "info": {"num_steps_sampled": 36000, "num_steps_trained": 36000, "default": {"policy_loss": -0.1339775025844574, "vf_explained_var": 0.4751656949520111, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 410.6561279296875, "entropy": 17.993558883666992, "kl": 0.01672077737748623, "total_loss": 410.5390625}, "sample_time_ms": 254757.672, "grad_time_ms": 694.401, "load_time_ms": 1.736, "update_time_ms": 2.615}, "timesteps_total": 36000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 36000, "hostname": "cda-server-3", "episode_reward_max": -88.04797756183808}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 7693.591760635376, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -143.76604889515352, "iterations_since_restore": 31, "episodes_total": 744, "timestamp": 1756402110, "episode_reward_mean": -103.7415526760245, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_19-28-30", "policy_reward_mean": {}, "time_this_iter_s": 230.46429753303528, "episodes_this_iter": 24, "training_iteration": 31, "time_total_s": 7693.591760635376, "info": {"num_steps_sampled": 37200, "num_steps_trained": 37200, "default": {"policy_loss": -0.13384594023227692, "vf_explained_var": 0.3274219036102295, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 553.2936401367188, "entropy": 17.990142822265625, "kl": 0.017523042857646942, "total_loss": 553.177490234375}, "sample_time_ms": 257066.525, "grad_time_ms": 693.991, "load_time_ms": 1.726, "update_time_ms": 2.629}, "timesteps_total": 37200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 37200, "hostname": "cda-server-3", "episode_reward_max": -83.96823218772687}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 7949.828924655914, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -143.76604889515352, "iterations_since_restore": 32, "episodes_total": 768, "timestamp": 1756402366, "episode_reward_mean": -102.33990607931862, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_19-32-46", "policy_reward_mean": {}, "time_this_iter_s": 256.23716402053833, "episodes_this_iter": 24, "training_iteration": 32, "time_total_s": 7949.828924655914, "info": {"num_steps_sampled": 38400, "num_steps_trained": 38400, "default": {"policy_loss": -0.13505858182907104, "vf_explained_var": -0.02648478001356125, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 648.9166870117188, "entropy": 17.949193954467773, "kl": 0.015016328543424606, "total_loss": 648.7968139648438}, "sample_time_ms": 252659.187, "grad_time_ms": 693.963, "load_time_ms": 1.673, "update_time_ms": 2.595}, "timesteps_total": 38400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 38400, "hostname": "cda-server-3", "episode_reward_max": -82.06553763454826}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 8189.958149909973, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -141.17269706060515, "iterations_since_restore": 33, "episodes_total": 792, "timestamp": 1756402606, "episode_reward_mean": -103.5668895180602, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_19-36-46", "policy_reward_mean": {}, "time_this_iter_s": 240.12922525405884, "episodes_this_iter": 24, "training_iteration": 33, "time_total_s": 8189.958149909973, "info": {"num_steps_sampled": 39600, "num_steps_trained": 39600, "default": {"policy_loss": -0.13893601298332214, "vf_explained_var": 0.5408138036727905, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 424.24908447265625, "entropy": 17.949119567871094, "kl": 0.017221523448824883, "total_loss": 424.1275634765625}, "sample_time_ms": 248869.829, "grad_time_ms": 693.6, "load_time_ms": 1.623, "update_time_ms": 2.574}, "timesteps_total": 39600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 39600, "hostname": "cda-server-3", "episode_reward_max": -82.06553763454826}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 8483.517776966095, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -143.58513812624415, "iterations_since_restore": 34, "episodes_total": 816, "timestamp": 1756402900, "episode_reward_mean": -104.27082951918139, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_19-41-40", "policy_reward_mean": {}, "time_this_iter_s": 293.5596270561218, "episodes_this_iter": 24, "training_iteration": 34, "time_total_s": 8483.517776966095, "info": {"num_steps_sampled": 40800, "num_steps_trained": 40800, "default": {"policy_loss": -0.1295945793390274, "vf_explained_var": 0.5013567805290222, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 432.3161315917969, "entropy": 17.887298583984375, "kl": 0.01532017532736063, "total_loss": 432.2020263671875}, "sample_time_ms": 249737.248, "grad_time_ms": 694.012, "load_time_ms": 1.621, "update_time_ms": 2.585}, "timesteps_total": 40800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 40800, "hostname": "cda-server-3", "episode_reward_max": -82.06553763454826}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 8723.819400072098, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -148.21402368422488, "iterations_since_restore": 35, "episodes_total": 840, "timestamp": 1756403140, "episode_reward_mean": -103.90413005160178, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_19-45-40", "policy_reward_mean": {}, "time_this_iter_s": 240.3016231060028, "episodes_this_iter": 24, "training_iteration": 35, "time_total_s": 8723.819400072098, "info": {"num_steps_sampled": 42000, "num_steps_trained": 42000, "default": {"policy_loss": -0.11818749457597733, "vf_explained_var": 0.43253830075263977, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 439.0633239746094, "entropy": 17.906269073486328, "kl": 0.014970477670431137, "total_loss": 438.9602966308594}, "sample_time_ms": 252674.46, "grad_time_ms": 694.241, "load_time_ms": 1.623, "update_time_ms": 2.579}, "timesteps_total": 42000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 42000, "hostname": "cda-server-3", "episode_reward_max": -82.06553763454826}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 8991.142573833466, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -148.21402368422488, "iterations_since_restore": 36, "episodes_total": 864, "timestamp": 1756403408, "episode_reward_mean": -105.02438479051513, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_19-50-08", "policy_reward_mean": {}, "time_this_iter_s": 267.3231737613678, "episodes_this_iter": 24, "training_iteration": 36, "time_total_s": 8991.142573833466, "info": {"num_steps_sampled": 43200, "num_steps_trained": 43200, "default": {"policy_loss": -0.13264299929141998, "vf_explained_var": 0.5789927244186401, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 339.433837890625, "entropy": 17.89673614501953, "kl": 0.016630493104457855, "total_loss": 339.31805419921875}, "sample_time_ms": 254973.588, "grad_time_ms": 695.398, "load_time_ms": 1.612, "update_time_ms": 2.552}, "timesteps_total": 43200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 43200, "hostname": "cda-server-3", "episode_reward_max": -86.89631256715614}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 9251.871697187424, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -148.21402368422488, "iterations_since_restore": 37, "episodes_total": 888, "timestamp": 1756403668, "episode_reward_mean": -104.76125330698889, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_19-54-28", "policy_reward_mean": {}, "time_this_iter_s": 260.72912335395813, "episodes_this_iter": 24, "training_iteration": 37, "time_total_s": 9251.871697187424, "info": {"num_steps_sampled": 44400, "num_steps_trained": 44400, "default": {"policy_loss": -0.13514705002307892, "vf_explained_var": 0.5892637968063354, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 338.72479248046875, "entropy": 17.81587028503418, "kl": 0.017263438552618027, "total_loss": 338.6070861816406}, "sample_time_ms": 257205.886, "grad_time_ms": 694.738, "load_time_ms": 1.618, "update_time_ms": 2.592}, "timesteps_total": 44400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 44400, "hostname": "cda-server-3", "episode_reward_max": -86.89631256715614}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 9515.561694860458, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -148.21402368422488, "iterations_since_restore": 38, "episodes_total": 912, "timestamp": 1756403932, "episode_reward_mean": -102.16851522701262, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_19-58-52", "policy_reward_mean": {}, "time_this_iter_s": 263.68999767303467, "episodes_this_iter": 24, "training_iteration": 38, "time_total_s": 9515.561694860458, "info": {"num_steps_sampled": 45600, "num_steps_trained": 45600, "default": {"policy_loss": -0.13061577081680298, "vf_explained_var": 0.6929558515548706, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 212.41424560546875, "entropy": 17.837133407592773, "kl": 0.016778942197561264, "total_loss": 212.30059814453125}, "sample_time_ms": 258723.305, "grad_time_ms": 695.81, "load_time_ms": 1.603, "update_time_ms": 2.598}, "timesteps_total": 45600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 45600, "hostname": "cda-server-3", "episode_reward_max": -85.2176874172706}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 9773.700018405914, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -129.5024379654697, "iterations_since_restore": 39, "episodes_total": 936, "timestamp": 1756404190, "episode_reward_mean": -101.25596506657908, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_20-03-10", "policy_reward_mean": {}, "time_this_iter_s": 258.13832354545593, "episodes_this_iter": 24, "training_iteration": 39, "time_total_s": 9773.700018405914, "info": {"num_steps_sampled": 46800, "num_steps_trained": 46800, "default": {"policy_loss": -0.15072497725486755, "vf_explained_var": 0.7493559122085571, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 181.5271453857422, "entropy": 17.8424129486084, "kl": 0.016762135550379753, "total_loss": 181.3933868408203}, "sample_time_ms": 260052.985, "grad_time_ms": 695.428, "load_time_ms": 1.613, "update_time_ms": 2.596}, "timesteps_total": 46800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 46800, "hostname": "cda-server-3", "episode_reward_max": -85.2176874172706}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 10019.42602467537, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -134.83725819359995, "iterations_since_restore": 40, "episodes_total": 960, "timestamp": 1756404436, "episode_reward_mean": -100.88615860699981, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_20-07-16", "policy_reward_mean": {}, "time_this_iter_s": 245.72600626945496, "episodes_this_iter": 24, "training_iteration": 40, "time_total_s": 10019.42602467537, "info": {"num_steps_sampled": 48000, "num_steps_trained": 48000, "default": {"policy_loss": -0.14604660868644714, "vf_explained_var": 0.7916211485862732, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 139.4286346435547, "entropy": 17.7719783782959, "kl": 0.017864830791950226, "total_loss": 139.30067443847656}, "sample_time_ms": 254924.671, "grad_time_ms": 695.871, "load_time_ms": 1.6, "update_time_ms": 2.606}, "timesteps_total": 48000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 48000, "hostname": "cda-server-3", "episode_reward_max": -85.2176874172706}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 10276.95909500122, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -134.83725819359995, "iterations_since_restore": 41, "episodes_total": 984, "timestamp": 1756404693, "episode_reward_mean": -99.97430550424826, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_20-11-33", "policy_reward_mean": {}, "time_this_iter_s": 257.53307032585144, "episodes_this_iter": 24, "training_iteration": 41, "time_total_s": 10276.95909500122, "info": {"num_steps_sampled": 49200, "num_steps_trained": 49200, "default": {"policy_loss": -0.1547583043575287, "vf_explained_var": 0.7901754379272461, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 130.47581481933594, "entropy": 17.793487548828125, "kl": 0.016820203512907028, "total_loss": 130.3380889892578}, "sample_time_ms": 257630.172, "grad_time_ms": 697.229, "load_time_ms": 1.608, "update_time_ms": 2.586}, "timesteps_total": 49200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 49200, "hostname": "cda-server-3", "episode_reward_max": -81.97190564989381}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 10532.508011579514, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -134.83725819359995, "iterations_since_restore": 42, "episodes_total": 1008, "timestamp": 1756404949, "episode_reward_mean": -100.59323159474148, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_20-15-49", "policy_reward_mean": {}, "time_this_iter_s": 255.54891657829285, "episodes_this_iter": 24, "training_iteration": 42, "time_total_s": 10532.508011579514, "info": {"num_steps_sampled": 50400, "num_steps_trained": 50400, "default": {"policy_loss": -0.1485620141029358, "vf_explained_var": 0.8014824986457825, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 130.30760192871094, "entropy": 17.76481819152832, "kl": 0.016707023605704308, "total_loss": 130.17596435546875}, "sample_time_ms": 257561.944, "grad_time_ms": 696.633, "load_time_ms": 1.607, "update_time_ms": 2.571}, "timesteps_total": 50400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 50400, "hostname": "cda-server-3", "episode_reward_max": -81.97190564989381}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 10790.558824539185, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -144.4774719951156, "iterations_since_restore": 43, "episodes_total": 1032, "timestamp": 1756405207, "episode_reward_mean": -101.62419135575888, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_20-20-07", "policy_reward_mean": {}, "time_this_iter_s": 258.050812959671, "episodes_this_iter": 24, "training_iteration": 43, "time_total_s": 10790.558824539185, "info": {"num_steps_sampled": 51600, "num_steps_trained": 51600, "default": {"policy_loss": -0.1260344386100769, "vf_explained_var": 0.7056383490562439, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 193.17147827148438, "entropy": 17.764888763427734, "kl": 0.01669412851333618, "total_loss": 193.0623321533203}, "sample_time_ms": 259353.801, "grad_time_ms": 696.863, "load_time_ms": 1.658, "update_time_ms": 2.593}, "timesteps_total": 51600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 51600, "hostname": "cda-server-3", "episode_reward_max": -81.97190564989381}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 11004.175188064575, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -144.4774719951156, "iterations_since_restore": 44, "episodes_total": 1056, "timestamp": 1756405421, "episode_reward_mean": -101.60767664423524, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_20-23-41", "policy_reward_mean": {}, "time_this_iter_s": 213.61636352539062, "episodes_this_iter": 24, "training_iteration": 44, "time_total_s": 11004.175188064575, "info": {"num_steps_sampled": 52800, "num_steps_trained": 52800, "default": {"policy_loss": -0.148858442902565, "vf_explained_var": 0.7899549007415771, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 127.62850952148438, "entropy": 17.72422981262207, "kl": 0.017617570236325264, "total_loss": 127.49748992919922}, "sample_time_ms": 251359.431, "grad_time_ms": 696.923, "load_time_ms": 1.654, "update_time_ms": 2.562}, "timesteps_total": 52800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 52800, "hostname": "cda-server-3", "episode_reward_max": -81.97190564989381}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 11276.314458370209, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -144.4774719951156, "iterations_since_restore": 45, "episodes_total": 1080, "timestamp": 1756405693, "episode_reward_mean": -101.40711573503677, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_20-28-13", "policy_reward_mean": {}, "time_this_iter_s": 272.13927030563354, "episodes_this_iter": 24, "training_iteration": 45, "time_total_s": 11276.314458370209, "info": {"num_steps_sampled": 54000, "num_steps_trained": 54000, "default": {"policy_loss": -0.13800571858882904, "vf_explained_var": 0.8040595054626465, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 110.159912109375, "entropy": 17.731359481811523, "kl": 0.017722077667713165, "total_loss": 110.03984832763672}, "sample_time_ms": 254543.995, "grad_time_ms": 696.108, "load_time_ms": 1.667, "update_time_ms": 2.57}, "timesteps_total": 54000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 54000, "hostname": "cda-server-3", "episode_reward_max": -80.96407251778136}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 11516.05266880989, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -144.4774719951156, "iterations_since_restore": 46, "episodes_total": 1104, "timestamp": 1756405933, "episode_reward_mean": -101.97060669596017, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_20-32-13", "policy_reward_mean": {}, "time_this_iter_s": 239.738210439682, "episodes_this_iter": 24, "training_iteration": 46, "time_total_s": 11516.05266880989, "info": {"num_steps_sampled": 55200, "num_steps_trained": 55200, "default": {"policy_loss": -0.15162310004234314, "vf_explained_var": 0.8339415788650513, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 126.8819580078125, "entropy": 17.67001724243164, "kl": 0.018563542515039444, "total_loss": 126.74913024902344}, "sample_time_ms": 251785.723, "grad_time_ms": 695.899, "load_time_ms": 1.643, "update_time_ms": 2.599}, "timesteps_total": 55200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 55200, "hostname": "cda-server-3", "episode_reward_max": -72.23740427864698}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 11774.868111371994, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -135.64386258019744, "iterations_since_restore": 47, "episodes_total": 1128, "timestamp": 1756406191, "episode_reward_mean": -100.08848784529565, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_20-36-31", "policy_reward_mean": {}, "time_this_iter_s": 258.81544256210327, "episodes_this_iter": 24, "training_iteration": 47, "time_total_s": 11774.868111371994, "info": {"num_steps_sampled": 56400, "num_steps_trained": 56400, "default": {"policy_loss": -0.14888200163841248, "vf_explained_var": 0.8016077280044556, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 115.84388732910156, "entropy": 17.718650817871094, "kl": 0.017319880425930023, "total_loss": 115.7125244140625}, "sample_time_ms": 251593.678, "grad_time_ms": 696.586, "load_time_ms": 1.642, "update_time_ms": 2.592}, "timesteps_total": 56400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 56400, "hostname": "cda-server-3", "episode_reward_max": -69.29839344771064}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 12001.616872549057, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -135.64386258019744, "iterations_since_restore": 48, "episodes_total": 1152, "timestamp": 1756406418, "episode_reward_mean": -99.68269586736893, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_20-40-18", "policy_reward_mean": {}, "time_this_iter_s": 226.748761177063, "episodes_this_iter": 24, "training_iteration": 48, "time_total_s": 12001.616872549057, "info": {"num_steps_sampled": 57600, "num_steps_trained": 57600, "default": {"policy_loss": -0.15360401570796967, "vf_explained_var": 0.8236192464828491, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 103.33358001708984, "entropy": 17.707372665405273, "kl": 0.019352156668901443, "total_loss": 103.19956970214844}, "sample_time_ms": 247900.738, "grad_time_ms": 695.412, "load_time_ms": 1.626, "update_time_ms": 2.588}, "timesteps_total": 57600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 57600, "hostname": "cda-server-3", "episode_reward_max": -69.29839344771064}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 12236.425989627838, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -135.64386258019744, "iterations_since_restore": 49, "episodes_total": 1176, "timestamp": 1756406653, "episode_reward_mean": -98.22463176781638, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_20-44-13", "policy_reward_mean": {}, "time_this_iter_s": 234.80911707878113, "episodes_this_iter": 24, "training_iteration": 49, "time_total_s": 12236.425989627838, "info": {"num_steps_sampled": 58800, "num_steps_trained": 58800, "default": {"policy_loss": -0.1393449306488037, "vf_explained_var": 0.8465521335601807, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 76.96279907226562, "entropy": 17.65727996826172, "kl": 0.017094898968935013, "total_loss": 76.84076690673828}, "sample_time_ms": 245567.736, "grad_time_ms": 695.671, "load_time_ms": 1.564, "update_time_ms": 2.572}, "timesteps_total": 58800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 58800, "hostname": "cda-server-3", "episode_reward_max": -69.29839344771064}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 12468.61930012703, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -135.64386258019744, "iterations_since_restore": 50, "episodes_total": 1200, "timestamp": 1756406885, "episode_reward_mean": -96.12076030956199, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_20-48-05", "policy_reward_mean": {}, "time_this_iter_s": 232.19331049919128, "episodes_this_iter": 24, "training_iteration": 50, "time_total_s": 12468.61930012703, "info": {"num_steps_sampled": 60000, "num_steps_trained": 60000, "default": {"policy_loss": -0.14331084489822388, "vf_explained_var": 0.9255598783493042, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 37.3577995300293, "entropy": 17.61494255065918, "kl": 0.018930919468402863, "total_loss": 37.2336540222168}, "sample_time_ms": 244214.895, "grad_time_ms": 695.213, "load_time_ms": 1.565, "update_time_ms": 2.591}, "timesteps_total": 60000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 60000, "hostname": "cda-server-3", "episode_reward_max": -69.29839344771064}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 12709.341829061508, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -132.3752722797274, "iterations_since_restore": 51, "episodes_total": 1224, "timestamp": 1756407126, "episode_reward_mean": -95.85087433939978, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_20-52-06", "policy_reward_mean": {}, "time_this_iter_s": 240.72252893447876, "episodes_this_iter": 24, "training_iteration": 51, "time_total_s": 12709.341829061508, "info": {"num_steps_sampled": 61200, "num_steps_trained": 61200, "default": {"policy_loss": -0.14188522100448608, "vf_explained_var": 0.8768813610076904, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 71.55755615234375, "entropy": 17.609588623046875, "kl": 0.018651418387889862, "total_loss": 71.43455505371094}, "sample_time_ms": 242534.376, "grad_time_ms": 694.668, "load_time_ms": 1.564, "update_time_ms": 2.595}, "timesteps_total": 61200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 61200, "hostname": "cda-server-3", "episode_reward_max": -74.95478802659025}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 12948.8257188797, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -132.3752722797274, "iterations_since_restore": 52, "episodes_total": 1248, "timestamp": 1756407365, "episode_reward_mean": -94.87060266743552, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_20-56-05", "policy_reward_mean": {}, "time_this_iter_s": 239.48388981819153, "episodes_this_iter": 24, "training_iteration": 52, "time_total_s": 12948.8257188797, "info": {"num_steps_sampled": 62400, "num_steps_trained": 62400, "default": {"policy_loss": -0.1256731152534485, "vf_explained_var": 0.8760194182395935, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 63.51656723022461, "entropy": 17.582059860229492, "kl": 0.01717858947813511, "total_loss": 63.40829086303711}, "sample_time_ms": 240927.317, "grad_time_ms": 695.266, "load_time_ms": 1.531, "update_time_ms": 2.61}, "timesteps_total": 62400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 62400, "hostname": "cda-server-3", "episode_reward_max": -74.95383250565217}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 13182.688966751099, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -132.3752722797274, "iterations_since_restore": 53, "episodes_total": 1272, "timestamp": 1756407599, "episode_reward_mean": -95.34690342570403, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_20-59-59", "policy_reward_mean": {}, "time_this_iter_s": 233.86324787139893, "episodes_this_iter": 24, "training_iteration": 53, "time_total_s": 13182.688966751099, "info": {"num_steps_sampled": 63600, "num_steps_trained": 63600, "default": {"policy_loss": -0.14219270646572113, "vf_explained_var": 0.9002120494842529, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 50.38748550415039, "entropy": 17.57541847229004, "kl": 0.016468307003378868, "total_loss": 50.261962890625}, "sample_time_ms": 238508.049, "grad_time_ms": 695.711, "load_time_ms": 1.556, "update_time_ms": 2.613}, "timesteps_total": 63600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 63600, "hostname": "cda-server-3", "episode_reward_max": -72.95456854464868}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 13417.420874357224, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -132.3752722797274, "iterations_since_restore": 54, "episodes_total": 1296, "timestamp": 1756407834, "episode_reward_mean": -95.8701949185, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_21-03-54", "policy_reward_mean": {}, "time_this_iter_s": 234.73190760612488, "episodes_this_iter": 24, "training_iteration": 54, "time_total_s": 13417.420874357224, "info": {"num_steps_sampled": 64800, "num_steps_trained": 64800, "default": {"policy_loss": -0.1341078281402588, "vf_explained_var": 0.8943191170692444, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 55.06442642211914, "entropy": 17.553176879882812, "kl": 0.016393329948186874, "total_loss": 54.94691467285156}, "sample_time_ms": 240620.357, "grad_time_ms": 694.996, "load_time_ms": 1.535, "update_time_ms": 2.629}, "timesteps_total": 64800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 64800, "hostname": "cda-server-3", "episode_reward_max": -72.95456854464868}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 13653.380255937576, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -118.67192241336538, "iterations_since_restore": 55, "episodes_total": 1320, "timestamp": 1756408070, "episode_reward_mean": -95.79111107637159, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_21-07-50", "policy_reward_mean": {}, "time_this_iter_s": 235.95938158035278, "episodes_this_iter": 24, "training_iteration": 55, "time_total_s": 13653.380255937576, "info": {"num_steps_sampled": 66000, "num_steps_trained": 66000, "default": {"policy_loss": -0.1451943963766098, "vf_explained_var": 0.8970387578010559, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 59.29791259765625, "entropy": 17.52239227294922, "kl": 0.01855158805847168, "total_loss": 59.17150115966797}, "sample_time_ms": 237001.363, "grad_time_ms": 696.064, "load_time_ms": 1.505, "update_time_ms": 2.594}, "timesteps_total": 66000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 66000, "hostname": "cda-server-3", "episode_reward_max": -72.95456854464868}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 13936.009518384933, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -118.67192241336538, "iterations_since_restore": 56, "episodes_total": 1344, "timestamp": 1756408353, "episode_reward_mean": -95.66737848522412, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_21-12-33", "policy_reward_mean": {}, "time_this_iter_s": 282.6292624473572, "episodes_this_iter": 24, "training_iteration": 56, "time_total_s": 13936.009518384933, "info": {"num_steps_sampled": 67200, "num_steps_trained": 67200, "default": {"policy_loss": -0.13796259462833405, "vf_explained_var": 0.8547341227531433, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 80.74215698242188, "entropy": 17.47957992553711, "kl": 0.016449345275759697, "total_loss": 80.62085723876953}, "sample_time_ms": 241290.762, "grad_time_ms": 695.682, "load_time_ms": 1.535, "update_time_ms": 2.581}, "timesteps_total": 67200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 67200, "hostname": "cda-server-3", "episode_reward_max": -72.95456854464868}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 14193.073428630829, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -120.17837555190016, "iterations_since_restore": 57, "episodes_total": 1368, "timestamp": 1756408610, "episode_reward_mean": -96.01223746240512, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_21-16-50", "policy_reward_mean": {}, "time_this_iter_s": 257.0639102458954, "episodes_this_iter": 24, "training_iteration": 57, "time_total_s": 14193.073428630829, "info": {"num_steps_sampled": 68400, "num_steps_trained": 68400, "default": {"policy_loss": -0.13664299249649048, "vf_explained_var": 0.8982321619987488, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 57.72923278808594, "entropy": 17.488954544067383, "kl": 0.01833203062415123, "total_loss": 57.61115264892578}, "sample_time_ms": 241115.722, "grad_time_ms": 695.653, "load_time_ms": 1.486, "update_time_ms": 2.574}, "timesteps_total": 68400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 68400, "hostname": "cda-server-3", "episode_reward_max": -77.63105009300338}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 14403.944508075714, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -120.17837555190016, "iterations_since_restore": 58, "episodes_total": 1392, "timestamp": 1756408821, "episode_reward_mean": -95.81383784659482, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_21-20-21", "policy_reward_mean": {}, "time_this_iter_s": 210.87107944488525, "episodes_this_iter": 24, "training_iteration": 58, "time_total_s": 14403.944508075714, "info": {"num_steps_sampled": 69600, "num_steps_trained": 69600, "default": {"policy_loss": -0.1360078603029251, "vf_explained_var": 0.9107392430305481, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 46.64500045776367, "entropy": 17.508567810058594, "kl": 0.016890546306967735, "total_loss": 46.52609634399414}, "sample_time_ms": 239527.122, "grad_time_ms": 696.5, "load_time_ms": 1.493, "update_time_ms": 2.548}, "timesteps_total": 69600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 69600, "hostname": "cda-server-3", "episode_reward_max": -77.63105009300338}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 14628.449810504913, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -134.19846850030785, "iterations_since_restore": 59, "episodes_total": 1416, "timestamp": 1756409045, "episode_reward_mean": -96.00413438464108, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_21-24-05", "policy_reward_mean": {}, "time_this_iter_s": 224.50530242919922, "episodes_this_iter": 24, "training_iteration": 59, "time_total_s": 14628.449810504913, "info": {"num_steps_sampled": 70800, "num_steps_trained": 70800, "default": {"policy_loss": -0.13526791334152222, "vf_explained_var": 0.8764873743057251, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 70.6440658569336, "entropy": 17.406675338745117, "kl": 0.015590902417898178, "total_loss": 70.52458190917969}, "sample_time_ms": 238496.96, "grad_time_ms": 696.199, "load_time_ms": 1.53, "update_time_ms": 2.565}, "timesteps_total": 70800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 70800, "hostname": "cda-server-3", "episode_reward_max": -68.80640733491872}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 14892.913598299026, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -134.19846850030785, "iterations_since_restore": 60, "episodes_total": 1440, "timestamp": 1756409310, "episode_reward_mean": -95.28690626054942, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_21-28-30", "policy_reward_mean": {}, "time_this_iter_s": 264.46378779411316, "episodes_this_iter": 24, "training_iteration": 60, "time_total_s": 14892.913598299026, "info": {"num_steps_sampled": 72000, "num_steps_trained": 72000, "default": {"policy_loss": -0.1450229287147522, "vf_explained_var": 0.8648273944854736, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 73.43704223632812, "entropy": 17.41325569152832, "kl": 0.017410503700375557, "total_loss": 73.30965423583984}, "sample_time_ms": 241723.871, "grad_time_ms": 696.371, "load_time_ms": 1.535, "update_time_ms": 2.538}, "timesteps_total": 72000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 72000, "hostname": "cda-server-3", "episode_reward_max": -65.12883998949023}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 15171.576050519943, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -140.69425675750867, "iterations_since_restore": 61, "episodes_total": 1464, "timestamp": 1756409588, "episode_reward_mean": -95.57552015729631, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_21-33-08", "policy_reward_mean": {}, "time_this_iter_s": 278.66245222091675, "episodes_this_iter": 24, "training_iteration": 61, "time_total_s": 15171.576050519943, "info": {"num_steps_sampled": 73200, "num_steps_trained": 73200, "default": {"policy_loss": -0.16056376695632935, "vf_explained_var": 0.8698188066482544, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 78.02471923828125, "entropy": 17.357572555541992, "kl": 0.016973795369267464, "total_loss": 77.88133239746094}, "sample_time_ms": 245517.672, "grad_time_ms": 696.497, "load_time_ms": 1.534, "update_time_ms": 2.557}, "timesteps_total": 73200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 73200, "hostname": "cda-server-3", "episode_reward_max": -65.12883998949023}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 15447.08240532875, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -140.69425675750867, "iterations_since_restore": 62, "episodes_total": 1488, "timestamp": 1756409864, "episode_reward_mean": -93.79442351704975, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_21-37-44", "policy_reward_mean": {}, "time_this_iter_s": 275.5063548088074, "episodes_this_iter": 24, "training_iteration": 62, "time_total_s": 15447.08240532875, "info": {"num_steps_sampled": 74400, "num_steps_trained": 74400, "default": {"policy_loss": -0.1545441895723343, "vf_explained_var": 0.8350050449371338, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 69.55598449707031, "entropy": 17.437522888183594, "kl": 0.017163407057523727, "total_loss": 69.41881561279297}, "sample_time_ms": 249118.09, "grad_time_ms": 698.277, "load_time_ms": 1.554, "update_time_ms": 2.574}, "timesteps_total": 74400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 74400, "hostname": "cda-server-3", "episode_reward_max": -65.12883998949023}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 15697.26745390892, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -140.69425675750867, "iterations_since_restore": 63, "episodes_total": 1512, "timestamp": 1756410114, "episode_reward_mean": -92.36666563102112, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_21-41-54", "policy_reward_mean": {}, "time_this_iter_s": 250.18504858016968, "episodes_this_iter": 24, "training_iteration": 63, "time_total_s": 15697.26745390892, "info": {"num_steps_sampled": 75600, "num_steps_trained": 75600, "default": {"policy_loss": -0.14007754623889923, "vf_explained_var": 0.8591345548629761, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 73.84480285644531, "entropy": 17.366647720336914, "kl": 0.016919545829296112, "total_loss": 73.72185516357422}, "sample_time_ms": 250750.329, "grad_time_ms": 698.257, "load_time_ms": 1.524, "update_time_ms": 2.579}, "timesteps_total": 75600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 75600, "hostname": "cda-server-3", "episode_reward_max": -62.945926316347276}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 15952.925563812256, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -140.69425675750867, "iterations_since_restore": 64, "episodes_total": 1536, "timestamp": 1756410370, "episode_reward_mean": -91.95464920578206, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_21-46-10", "policy_reward_mean": {}, "time_this_iter_s": 255.65810990333557, "episodes_this_iter": 24, "training_iteration": 64, "time_total_s": 15952.925563812256, "info": {"num_steps_sampled": 76800, "num_steps_trained": 76800, "default": {"policy_loss": -0.14222145080566406, "vf_explained_var": 0.8870275616645813, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 54.59144592285156, "entropy": 17.29293441772461, "kl": 0.016513163223862648, "total_loss": 54.4659423828125}, "sample_time_ms": 252842.238, "grad_time_ms": 698.963, "load_time_ms": 1.542, "update_time_ms": 2.566}, "timesteps_total": 76800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 76800, "hostname": "cda-server-3", "episode_reward_max": -62.945926316347276}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 16182.57912182808, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -123.09070788121295, "iterations_since_restore": 65, "episodes_total": 1560, "timestamp": 1756410599, "episode_reward_mean": -89.69694503502396, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_21-49-59", "policy_reward_mean": {}, "time_this_iter_s": 229.65355801582336, "episodes_this_iter": 24, "training_iteration": 65, "time_total_s": 16182.57912182808, "info": {"num_steps_sampled": 78000, "num_steps_trained": 78000, "default": {"policy_loss": -0.13650605082511902, "vf_explained_var": 0.8334833979606628, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 79.48489379882812, "entropy": 17.300722122192383, "kl": 0.016983311623334885, "total_loss": 79.36558532714844}, "sample_time_ms": 252212.596, "grad_time_ms": 698.018, "load_time_ms": 1.542, "update_time_ms": 2.56}, "timesteps_total": 78000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 78000, "hostname": "cda-server-3", "episode_reward_max": -62.945926316347276}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 16448.787168741226, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -123.09070788121295, "iterations_since_restore": 66, "episodes_total": 1584, "timestamp": 1756410865, "episode_reward_mean": -88.2820038471582, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_21-54-25", "policy_reward_mean": {}, "time_this_iter_s": 266.208046913147, "episodes_this_iter": 24, "training_iteration": 66, "time_total_s": 16448.787168741226, "info": {"num_steps_sampled": 79200, "num_steps_trained": 79200, "default": {"policy_loss": -0.13701409101486206, "vf_explained_var": 0.8851307034492493, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 49.43205261230469, "entropy": 17.186141967773438, "kl": 0.01660430245101452, "total_loss": 49.311851501464844}, "sample_time_ms": 250570.621, "grad_time_ms": 697.943, "load_time_ms": 1.536, "update_time_ms": 2.553}, "timesteps_total": 79200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 79200, "hostname": "cda-server-3", "episode_reward_max": -62.945926316347276}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 16695.300344944, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -112.2154760288806, "iterations_since_restore": 67, "episodes_total": 1608, "timestamp": 1756411112, "episode_reward_mean": -87.30647296079995, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_21-58-32", "policy_reward_mean": {}, "time_this_iter_s": 246.51317620277405, "episodes_this_iter": 24, "training_iteration": 67, "time_total_s": 16695.300344944, "info": {"num_steps_sampled": 80400, "num_steps_trained": 80400, "default": {"policy_loss": -0.14472953975200653, "vf_explained_var": 0.8869979381561279, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 50.39473342895508, "entropy": 17.204143524169922, "kl": 0.01810036227107048, "total_loss": 50.268333435058594}, "sample_time_ms": 249516.096, "grad_time_ms": 697.373, "load_time_ms": 1.53, "update_time_ms": 2.542}, "timesteps_total": 80400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 80400, "hostname": "cda-server-3", "episode_reward_max": -60.97078129308109}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 16937.570281505585, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -104.9671725722534, "iterations_since_restore": 68, "episodes_total": 1632, "timestamp": 1756411354, "episode_reward_mean": -85.12118934184193, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_22-02-34", "policy_reward_mean": {}, "time_this_iter_s": 242.26993656158447, "episodes_this_iter": 24, "training_iteration": 68, "time_total_s": 16937.570281505585, "info": {"num_steps_sampled": 81600, "num_steps_trained": 81600, "default": {"policy_loss": -0.1376802623271942, "vf_explained_var": 0.8597739338874817, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 57.41520309448242, "entropy": 17.178375244140625, "kl": 0.0169665589928627, "total_loss": 57.29470443725586}, "sample_time_ms": 252656.029, "grad_time_ms": 697.251, "load_time_ms": 1.562, "update_time_ms": 2.569}, "timesteps_total": 81600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 81600, "hostname": "cda-server-3", "episode_reward_max": -60.95099421013692}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 17204.35671567917, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -108.20417373274827, "iterations_since_restore": 69, "episodes_total": 1656, "timestamp": 1756411621, "episode_reward_mean": -84.75867703744163, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_22-07-01", "policy_reward_mean": {}, "time_this_iter_s": 266.786434173584, "episodes_this_iter": 24, "training_iteration": 69, "time_total_s": 17204.35671567917, "info": {"num_steps_sampled": 82800, "num_steps_trained": 82800, "default": {"policy_loss": -0.13954412937164307, "vf_explained_var": 0.8834936618804932, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 52.70014190673828, "entropy": 17.090085983276367, "kl": 0.01663246750831604, "total_loss": 52.57743835449219}, "sample_time_ms": 256884.429, "grad_time_ms": 697.089, "load_time_ms": 1.488, "update_time_ms": 2.548}, "timesteps_total": 82800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 82800, "hostname": "cda-server-3", "episode_reward_max": -60.95099421013692}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 17439.835283517838, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -108.20417373274827, "iterations_since_restore": 70, "episodes_total": 1680, "timestamp": 1756411857, "episode_reward_mean": -84.66569654180248, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_22-10-57", "policy_reward_mean": {}, "time_this_iter_s": 235.47856783866882, "episodes_this_iter": 24, "training_iteration": 70, "time_total_s": 17439.835283517838, "info": {"num_steps_sampled": 84000, "num_steps_trained": 84000, "default": {"policy_loss": -0.13918136060237885, "vf_explained_var": 0.8703316450119019, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 57.281005859375, "entropy": 17.030242919921875, "kl": 0.015691058710217476, "total_loss": 57.15771484375}, "sample_time_ms": 253984.995, "grad_time_ms": 697.978, "load_time_ms": 1.486, "update_time_ms": 2.563}, "timesteps_total": 84000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 84000, "hostname": "cda-server-3", "episode_reward_max": -60.95099421013692}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 17697.609385490417, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -131.91481230341097, "iterations_since_restore": 71, "episodes_total": 1704, "timestamp": 1756412114, "episode_reward_mean": -84.41687713566581, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_22-15-14", "policy_reward_mean": {}, "time_this_iter_s": 257.77410197257996, "episodes_this_iter": 24, "training_iteration": 71, "time_total_s": 17697.609385490417, "info": {"num_steps_sampled": 85200, "num_steps_trained": 85200, "default": {"policy_loss": -0.14263315498828888, "vf_explained_var": 0.8583628535270691, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 61.98405075073242, "entropy": 17.083913803100586, "kl": 0.017013147473335266, "total_loss": 61.85863494873047}, "sample_time_ms": 251896.396, "grad_time_ms": 697.808, "load_time_ms": 1.491, "update_time_ms": 2.553}, "timesteps_total": 85200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 85200, "hostname": "cda-server-3", "episode_reward_max": -60.95099421013692}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 17942.44306564331, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -131.91481230341097, "iterations_since_restore": 72, "episodes_total": 1728, "timestamp": 1756412359, "episode_reward_mean": -83.9152839901135, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_22-19-19", "policy_reward_mean": {}, "time_this_iter_s": 244.83368015289307, "episodes_this_iter": 24, "training_iteration": 72, "time_total_s": 17942.44306564331, "info": {"num_steps_sampled": 86400, "num_steps_trained": 86400, "default": {"policy_loss": -0.1403069943189621, "vf_explained_var": 0.8636730909347534, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 56.658843994140625, "entropy": 17.043167114257812, "kl": 0.01649215817451477, "total_loss": 56.53523635864258}, "sample_time_ms": 248830.982, "grad_time_ms": 696.081, "load_time_ms": 1.428, "update_time_ms": 2.547}, "timesteps_total": 86400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 86400, "hostname": "cda-server-3", "episode_reward_max": -61.90480025645444}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 18184.41885781288, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -131.91481230341097, "iterations_since_restore": 73, "episodes_total": 1752, "timestamp": 1756412601, "episode_reward_mean": -83.86643944815816, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_22-23-21", "policy_reward_mean": {}, "time_this_iter_s": 241.97579216957092, "episodes_this_iter": 24, "training_iteration": 73, "time_total_s": 18184.41885781288, "info": {"num_steps_sampled": 87600, "num_steps_trained": 87600, "default": {"policy_loss": -0.14834047853946686, "vf_explained_var": 0.8909367322921753, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 44.6313362121582, "entropy": 16.992233276367188, "kl": 0.017693255096673965, "total_loss": 44.50090789794922}, "sample_time_ms": 248009.755, "grad_time_ms": 696.471, "load_time_ms": 1.402, "update_time_ms": 2.523}, "timesteps_total": 87600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 87600, "hostname": "cda-server-3", "episode_reward_max": -60.02371123132278}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 18448.48611831665, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -131.91481230341097, "iterations_since_restore": 74, "episodes_total": 1776, "timestamp": 1756412865, "episode_reward_mean": -83.91763020685005, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_22-27-45", "policy_reward_mean": {}, "time_this_iter_s": 264.0672605037689, "episodes_this_iter": 24, "training_iteration": 74, "time_total_s": 18448.48611831665, "info": {"num_steps_sampled": 88800, "num_steps_trained": 88800, "default": {"policy_loss": -0.13635270297527313, "vf_explained_var": 0.8877306580543518, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 50.658626556396484, "entropy": 16.9888916015625, "kl": 0.01725666970014572, "total_loss": 50.53974533081055}, "sample_time_ms": 248850.608, "grad_time_ms": 696.485, "load_time_ms": 1.416, "update_time_ms": 2.517}, "timesteps_total": 88800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 88800, "hostname": "cda-server-3", "episode_reward_max": -59.006022251229936}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 18711.91195678711, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -114.60479451289457, "iterations_since_restore": 75, "episodes_total": 1800, "timestamp": 1756413129, "episode_reward_mean": -83.25586565324383, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_22-32-09", "policy_reward_mean": {}, "time_this_iter_s": 263.425838470459, "episodes_this_iter": 24, "training_iteration": 75, "time_total_s": 18711.91195678711, "info": {"num_steps_sampled": 90000, "num_steps_trained": 90000, "default": {"policy_loss": -0.13750618696212769, "vf_explained_var": 0.8878066539764404, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 45.646209716796875, "entropy": 16.916126251220703, "kl": 0.01593046449124813, "total_loss": 45.52482986450195}, "sample_time_ms": 252226.95, "grad_time_ms": 697.303, "load_time_ms": 1.438, "update_time_ms": 2.541}, "timesteps_total": 90000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 90000, "hostname": "cda-server-3", "episode_reward_max": -54.96061487194269}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 18935.775758504868, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -114.60479451289457, "iterations_since_restore": 76, "episodes_total": 1824, "timestamp": 1756413353, "episode_reward_mean": -82.72014624301787, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_22-35-53", "policy_reward_mean": {}, "time_this_iter_s": 223.86380171775818, "episodes_this_iter": 24, "training_iteration": 76, "time_total_s": 18935.775758504868, "info": {"num_steps_sampled": 91200, "num_steps_trained": 91200, "default": {"policy_loss": -0.1411411315202713, "vf_explained_var": 0.90963214635849, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 31.231788635253906, "entropy": 16.93149185180664, "kl": 0.017530765384435654, "total_loss": 31.108394622802734}, "sample_time_ms": 247992.191, "grad_time_ms": 697.658, "load_time_ms": 1.458, "update_time_ms": 2.527}, "timesteps_total": 91200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 91200, "hostname": "cda-server-3", "episode_reward_max": -54.96061487194269}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 19178.031841754913, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -108.94272480428417, "iterations_since_restore": 77, "episodes_total": 1848, "timestamp": 1756413595, "episode_reward_mean": -80.92310797396698, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_22-39-55", "policy_reward_mean": {}, "time_this_iter_s": 242.25608325004578, "episodes_this_iter": 24, "training_iteration": 77, "time_total_s": 19178.031841754913, "info": {"num_steps_sampled": 92400, "num_steps_trained": 92400, "default": {"policy_loss": -0.14614935219287872, "vf_explained_var": 0.8987939357757568, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 35.67803955078125, "entropy": 16.90253448486328, "kl": 0.01667719893157482, "total_loss": 35.54877471923828}, "sample_time_ms": 247566.57, "grad_time_ms": 697.475, "load_time_ms": 1.53, "update_time_ms": 2.536}, "timesteps_total": 92400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 92400, "hostname": "cda-server-3", "episode_reward_max": -54.96061487194269}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 19408.83300757408, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -112.77217478784608, "iterations_since_restore": 78, "episodes_total": 1872, "timestamp": 1756413826, "episode_reward_mean": -78.66633419654116, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_22-43-46", "policy_reward_mean": {}, "time_this_iter_s": 230.8011658191681, "episodes_this_iter": 24, "training_iteration": 78, "time_total_s": 19408.83300757408, "info": {"num_steps_sampled": 93600, "num_steps_trained": 93600, "default": {"policy_loss": -0.14167816936969757, "vf_explained_var": 0.8663337826728821, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 49.84602355957031, "entropy": 16.82881736755371, "kl": 0.016407020390033722, "total_loss": 49.72095489501953}, "sample_time_ms": 246420.316, "grad_time_ms": 696.87, "load_time_ms": 1.52, "update_time_ms": 2.508}, "timesteps_total": 93600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 93600, "hostname": "cda-server-3", "episode_reward_max": -54.96061487194269}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 19628.77525162697, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -112.77217478784608, "iterations_since_restore": 79, "episodes_total": 1896, "timestamp": 1756414046, "episode_reward_mean": -76.82071840459376, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_22-47-26", "policy_reward_mean": {}, "time_this_iter_s": 219.94224405288696, "episodes_this_iter": 24, "training_iteration": 79, "time_total_s": 19628.77525162697, "info": {"num_steps_sampled": 94800, "num_steps_trained": 94800, "default": {"policy_loss": -0.13872185349464417, "vf_explained_var": 0.8788679838180542, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 42.41904067993164, "entropy": 16.846330642700195, "kl": 0.017035197466611862, "total_loss": 42.29756164550781}, "sample_time_ms": 241734.828, "grad_time_ms": 697.758, "load_time_ms": 1.631, "update_time_ms": 2.512}, "timesteps_total": 94800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 94800, "hostname": "cda-server-3", "episode_reward_max": -55.940889508221765}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 19877.09362578392, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -112.77217478784608, "iterations_since_restore": 80, "episodes_total": 1920, "timestamp": 1756414294, "episode_reward_mean": -75.66733800064131, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_22-51-34", "policy_reward_mean": {}, "time_this_iter_s": 248.3183741569519, "episodes_this_iter": 24, "training_iteration": 80, "time_total_s": 19877.09362578392, "info": {"num_steps_sampled": 96000, "num_steps_trained": 96000, "default": {"policy_loss": -0.146415576338768, "vf_explained_var": 0.8998842239379883, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 35.70315933227539, "entropy": 16.746917724609375, "kl": 0.01694786176085472, "total_loss": 35.57390213012695}, "sample_time_ms": 243019.57, "grad_time_ms": 696.957, "load_time_ms": 1.641, "update_time_ms": 2.523}, "timesteps_total": 96000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 96000, "hostname": "cda-server-3", "episode_reward_max": -55.940889508221765}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 20132.509190797806, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -112.77217478784608, "iterations_since_restore": 81, "episodes_total": 1944, "timestamp": 1756414549, "episode_reward_mean": -74.45318096784645, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_22-55-49", "policy_reward_mean": {}, "time_this_iter_s": 255.4155650138855, "episodes_this_iter": 24, "training_iteration": 81, "time_total_s": 20132.509190797806, "info": {"num_steps_sampled": 97200, "num_steps_trained": 97200, "default": {"policy_loss": -0.1437041163444519, "vf_explained_var": 0.8701409697532654, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 36.276241302490234, "entropy": 16.741798400878906, "kl": 0.015728479251265526, "total_loss": 36.14846420288086}, "sample_time_ms": 242784.541, "grad_time_ms": 696.233, "load_time_ms": 1.548, "update_time_ms": 2.548}, "timesteps_total": 97200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 97200, "hostname": "cda-server-3", "episode_reward_max": -55.940889508221765}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 20363.48011994362, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -112.77217478784608, "iterations_since_restore": 82, "episodes_total": 1968, "timestamp": 1756414780, "episode_reward_mean": -72.77709877049519, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_22-59-40", "policy_reward_mean": {}, "time_this_iter_s": 230.970929145813, "episodes_this_iter": 24, "training_iteration": 82, "time_total_s": 20363.48011994362, "info": {"num_steps_sampled": 98400, "num_steps_trained": 98400, "default": {"policy_loss": -0.14084871113300323, "vf_explained_var": 0.8655793070793152, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 34.792381286621094, "entropy": 16.7445068359375, "kl": 0.01601782813668251, "total_loss": 34.66774368286133}, "sample_time_ms": 241397.784, "grad_time_ms": 696.74, "load_time_ms": 1.538, "update_time_ms": 2.54}, "timesteps_total": 98400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 98400, "hostname": "cda-server-3", "episode_reward_max": -53.95587853910099}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 20580.654803276062, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -107.4522891873826, "iterations_since_restore": 83, "episodes_total": 1992, "timestamp": 1756414997, "episode_reward_mean": -70.81955430992147, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_23-03-17", "policy_reward_mean": {}, "time_this_iter_s": 217.17468333244324, "episodes_this_iter": 24, "training_iteration": 83, "time_total_s": 20580.654803276062, "info": {"num_steps_sampled": 99600, "num_steps_trained": 99600, "default": {"policy_loss": -0.13204234838485718, "vf_explained_var": 0.8760443925857544, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 31.32162094116211, "entropy": 16.793642044067383, "kl": 0.01535502914339304, "total_loss": 31.20512580871582}, "sample_time_ms": 238918.834, "grad_time_ms": 695.57, "load_time_ms": 1.534, "update_time_ms": 2.518}, "timesteps_total": 99600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 99600, "hostname": "cda-server-3", "episode_reward_max": -53.95587853910099}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 20815.180485486984, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -107.4522891873826, "iterations_since_restore": 84, "episodes_total": 2016, "timestamp": 1756415232, "episode_reward_mean": -68.76959735542866, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_23-07-12", "policy_reward_mean": {}, "time_this_iter_s": 234.52568221092224, "episodes_this_iter": 24, "training_iteration": 84, "time_total_s": 20815.180485486984, "info": {"num_steps_sampled": 100800, "num_steps_trained": 100800, "default": {"policy_loss": -0.14433318376541138, "vf_explained_var": 0.8722853064537048, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 29.641008377075195, "entropy": 16.78034210205078, "kl": 0.01663334108889103, "total_loss": 29.51351547241211}, "sample_time_ms": 235965.602, "grad_time_ms": 694.781, "load_time_ms": 1.439, "update_time_ms": 2.549}, "timesteps_total": 100800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 100800, "hostname": "cda-server-3", "episode_reward_max": -53.95587853910099}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 21029.106865644455, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -107.98561338414216, "iterations_since_restore": 85, "episodes_total": 2040, "timestamp": 1756415446, "episode_reward_mean": -69.05470528023939, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_23-10-46", "policy_reward_mean": {}, "time_this_iter_s": 213.9263801574707, "episodes_this_iter": 24, "training_iteration": 85, "time_total_s": 21029.106865644455, "info": {"num_steps_sampled": 102000, "num_steps_trained": 102000, "default": {"policy_loss": -0.14800840616226196, "vf_explained_var": 0.8737674355506897, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 44.28384780883789, "entropy": 16.667724609375, "kl": 0.017794229090213776, "total_loss": 44.15385818481445}, "sample_time_ms": 231016.692, "grad_time_ms": 693.88, "load_time_ms": 1.364, "update_time_ms": 2.556}, "timesteps_total": 102000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 102000, "hostname": "cda-server-3", "episode_reward_max": -53.95587853910099}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 21267.91470694542, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -107.98561338414216, "iterations_since_restore": 86, "episodes_total": 2064, "timestamp": 1756415685, "episode_reward_mean": -69.5951626072507, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_23-14-45", "policy_reward_mean": {}, "time_this_iter_s": 238.80784130096436, "episodes_this_iter": 24, "training_iteration": 86, "time_total_s": 21267.91470694542, "info": {"num_steps_sampled": 103200, "num_steps_trained": 103200, "default": {"policy_loss": -0.1315995305776596, "vf_explained_var": 0.8468186855316162, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 42.02323913574219, "entropy": 16.615787506103516, "kl": 0.016590215265750885, "total_loss": 41.90843963623047}, "sample_time_ms": 232511.672, "grad_time_ms": 693.298, "load_time_ms": 1.332, "update_time_ms": 2.554}, "timesteps_total": 103200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 103200, "hostname": "cda-server-3", "episode_reward_max": -54.95182090997833}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 21523.015555143356, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -107.98561338414216, "iterations_since_restore": 87, "episodes_total": 2088, "timestamp": 1756415940, "episode_reward_mean": -68.46944199107841, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_23-19-00", "policy_reward_mean": {}, "time_this_iter_s": 255.100848197937, "episodes_this_iter": 24, "training_iteration": 87, "time_total_s": 21523.015555143356, "info": {"num_steps_sampled": 104400, "num_steps_trained": 104400, "default": {"policy_loss": -0.13484853506088257, "vf_explained_var": 0.8937379121780396, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 24.742692947387695, "entropy": 16.481712341308594, "kl": 0.01563051901757717, "total_loss": 24.62367057800293}, "sample_time_ms": 233795.722, "grad_time_ms": 693.749, "load_time_ms": 1.314, "update_time_ms": 2.571}, "timesteps_total": 104400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 104400, "hostname": "cda-server-3", "episode_reward_max": -54.95182090997833}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 21755.724896669388, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -107.98561338414216, "iterations_since_restore": 88, "episodes_total": 2112, "timestamp": 1756416173, "episode_reward_mean": -68.90574880241981, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_23-22-53", "policy_reward_mean": {}, "time_this_iter_s": 232.7093415260315, "episodes_this_iter": 24, "training_iteration": 88, "time_total_s": 21755.724896669388, "info": {"num_steps_sampled": 105600, "num_steps_trained": 105600, "default": {"policy_loss": -0.14801417291164398, "vf_explained_var": 0.863982617855072, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 38.028564453125, "entropy": 16.538761138916016, "kl": 0.01715698093175888, "total_loss": 37.89792251586914}, "sample_time_ms": 233985.216, "grad_time_ms": 695.112, "load_time_ms": 1.293, "update_time_ms": 2.564}, "timesteps_total": 105600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 105600, "hostname": "cda-server-3", "episode_reward_max": -53.2230760042775}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 21995.29202914238, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -115.55456980047862, "iterations_since_restore": 89, "episodes_total": 2136, "timestamp": 1756416412, "episode_reward_mean": -68.03962091148274, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_23-26-52", "policy_reward_mean": {}, "time_this_iter_s": 239.56713247299194, "episodes_this_iter": 24, "training_iteration": 89, "time_total_s": 21995.29202914238, "info": {"num_steps_sampled": 106800, "num_steps_trained": 106800, "default": {"policy_loss": -0.13819807767868042, "vf_explained_var": 0.860944390296936, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 40.49483108520508, "entropy": 16.507343292236328, "kl": 0.015339210629463196, "total_loss": 40.37216567993164}, "sample_time_ms": 235947.864, "grad_time_ms": 694.981, "load_time_ms": 1.258, "update_time_ms": 2.587}, "timesteps_total": 106800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 106800, "hostname": "cda-server-3", "episode_reward_max": -53.2230760042775}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 22200.77853822708, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -115.55456980047862, "iterations_since_restore": 90, "episodes_total": 2160, "timestamp": 1756416618, "episode_reward_mean": -66.43045601954503, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_23-30-18", "policy_reward_mean": {}, "time_this_iter_s": 205.48650908470154, "episodes_this_iter": 24, "training_iteration": 90, "time_total_s": 22200.77853822708, "info": {"num_steps_sampled": 108000, "num_steps_trained": 108000, "default": {"policy_loss": -0.1232977956533432, "vf_explained_var": 0.8503206968307495, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 30.72481918334961, "entropy": 16.54416275024414, "kl": 0.015979347750544548, "total_loss": 30.61770248413086}, "sample_time_ms": 231664.322, "grad_time_ms": 695.498, "load_time_ms": 1.195, "update_time_ms": 2.564}, "timesteps_total": 108000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 108000, "hostname": "cda-server-3", "episode_reward_max": -53.2230760042775}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 22468.302712917328, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -115.55456980047862, "iterations_since_restore": 91, "episodes_total": 2184, "timestamp": 1756416885, "episode_reward_mean": -67.06299508675095, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_23-34-45", "policy_reward_mean": {}, "time_this_iter_s": 267.5241746902466, "episodes_this_iter": 24, "training_iteration": 91, "time_total_s": 22468.302712917328, "info": {"num_steps_sampled": 109200, "num_steps_trained": 109200, "default": {"policy_loss": -0.1417466253042221, "vf_explained_var": 0.8252907991409302, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 51.491268157958984, "entropy": 16.51049041748047, "kl": 0.0179302878677845, "total_loss": 51.367679595947266}, "sample_time_ms": 232874.007, "grad_time_ms": 696.569, "load_time_ms": 1.295, "update_time_ms": 2.534}, "timesteps_total": 109200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 109200, "hostname": "cda-server-3", "episode_reward_max": -53.2230760042775}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 22723.705909967422, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -115.55456980047862, "iterations_since_restore": 92, "episodes_total": 2208, "timestamp": 1756417141, "episode_reward_mean": -65.61862931952739, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_23-39-01", "policy_reward_mean": {}, "time_this_iter_s": 255.4031970500946, "episodes_this_iter": 24, "training_iteration": 92, "time_total_s": 22723.705909967422, "info": {"num_steps_sampled": 110400, "num_steps_trained": 110400, "default": {"policy_loss": -0.13279880583286285, "vf_explained_var": 0.9043550491333008, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 18.820959091186523, "entropy": 16.467430114746094, "kl": 0.016397977247834206, "total_loss": 18.70476531982422}, "sample_time_ms": 235317.525, "grad_time_ms": 696.121, "load_time_ms": 1.383, "update_time_ms": 2.551}, "timesteps_total": 110400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 110400, "hostname": "cda-server-3", "episode_reward_max": -53.2230760042775}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 22970.57584619522, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -115.55456980047862, "iterations_since_restore": 93, "episodes_total": 2232, "timestamp": 1756417387, "episode_reward_mean": -64.91044855525129, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_23-43-07", "policy_reward_mean": {}, "time_this_iter_s": 246.86993622779846, "episodes_this_iter": 24, "training_iteration": 93, "time_total_s": 22970.57584619522, "info": {"num_steps_sampled": 111600, "num_steps_trained": 111600, "default": {"policy_loss": -0.1461043804883957, "vf_explained_var": 0.8137485980987549, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 40.37269973754883, "entropy": 16.43319320678711, "kl": 0.017595432698726654, "total_loss": 40.24441146850586}, "sample_time_ms": 238287.126, "grad_time_ms": 695.95, "load_time_ms": 1.431, "update_time_ms": 2.577}, "timesteps_total": 111600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 111600, "hostname": "cda-server-3", "episode_reward_max": -53.897588277465395}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 23212.88718509674, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -98.42147392309447, "iterations_since_restore": 94, "episodes_total": 2256, "timestamp": 1756417630, "episode_reward_mean": -64.81923247327849, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_23-47-10", "policy_reward_mean": {}, "time_this_iter_s": 242.31133890151978, "episodes_this_iter": 24, "training_iteration": 94, "time_total_s": 23212.88718509674, "info": {"num_steps_sampled": 112800, "num_steps_trained": 112800, "default": {"policy_loss": -0.14040905237197876, "vf_explained_var": 0.8580853343009949, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 31.303335189819336, "entropy": 16.410274505615234, "kl": 0.015972889959812164, "total_loss": 31.17909812927246}, "sample_time_ms": 239064.399, "grad_time_ms": 697.275, "load_time_ms": 1.437, "update_time_ms": 2.554}, "timesteps_total": 112800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 112800, "hostname": "cda-server-3", "episode_reward_max": -54.8650017855454}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 23426.63425207138, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -98.42147392309447, "iterations_since_restore": 95, "episodes_total": 2280, "timestamp": 1756417844, "episode_reward_mean": -63.39130856250731, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_23-50-44", "policy_reward_mean": {}, "time_this_iter_s": 213.7470669746399, "episodes_this_iter": 24, "training_iteration": 95, "time_total_s": 23426.63425207138, "info": {"num_steps_sampled": 114000, "num_steps_trained": 114000, "default": {"policy_loss": -0.12778830528259277, "vf_explained_var": 0.8555526733398438, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 26.613889694213867, "entropy": 16.41890525817871, "kl": 0.01759319193661213, "total_loss": 26.503915786743164}, "sample_time_ms": 239045.459, "grad_time_ms": 698.177, "load_time_ms": 1.501, "update_time_ms": 2.552}, "timesteps_total": 114000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 114000, "hostname": "cda-server-3", "episode_reward_max": -54.074040013498}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 23677.734798192978, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -101.42589945490688, "iterations_since_restore": 96, "episodes_total": 2304, "timestamp": 1756418095, "episode_reward_mean": -63.60230280510545, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_23-54-55", "policy_reward_mean": {}, "time_this_iter_s": 251.1005461215973, "episodes_this_iter": 24, "training_iteration": 96, "time_total_s": 23677.734798192978, "info": {"num_steps_sampled": 115200, "num_steps_trained": 115200, "default": {"policy_loss": -0.13580124080181122, "vf_explained_var": 0.8498879075050354, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 35.0389404296875, "entropy": 16.359601974487305, "kl": 0.015551825053989887, "total_loss": 34.918888092041016}, "sample_time_ms": 240275.117, "grad_time_ms": 697.774, "load_time_ms": 1.513, "update_time_ms": 2.554}, "timesteps_total": 115200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 115200, "hostname": "cda-server-3", "episode_reward_max": -52.858943297092495}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 23935.555701732635, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -101.42589945490688, "iterations_since_restore": 97, "episodes_total": 2328, "timestamp": 1756418353, "episode_reward_mean": -64.01519855934126, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-28_23-59-13", "policy_reward_mean": {}, "time_this_iter_s": 257.8209035396576, "episodes_this_iter": 24, "training_iteration": 97, "time_total_s": 23935.555701732635, "info": {"num_steps_sampled": 116400, "num_steps_trained": 116400, "default": {"policy_loss": -0.14195483922958374, "vf_explained_var": 0.8827171921730042, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 26.063066482543945, "entropy": 16.319156646728516, "kl": 0.015602422878146172, "total_loss": 25.936906814575195}, "sample_time_ms": 240547.419, "grad_time_ms": 697.496, "load_time_ms": 1.509, "update_time_ms": 2.535}, "timesteps_total": 116400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 116400, "hostname": "cda-server-3", "episode_reward_max": -52.858943297092495}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 24136.122399806976, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -101.42589945490688, "iterations_since_restore": 98, "episodes_total": 2352, "timestamp": 1756418553, "episode_reward_mean": -63.12411026398803, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_00-02-33", "policy_reward_mean": {}, "time_this_iter_s": 200.56669807434082, "episodes_this_iter": 24, "training_iteration": 98, "time_total_s": 24136.122399806976, "info": {"num_steps_sampled": 117600, "num_steps_trained": 117600, "default": {"policy_loss": -0.12811775505542755, "vf_explained_var": 0.8606259822845459, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 26.62224578857422, "entropy": 16.33074188232422, "kl": 0.01618027687072754, "total_loss": 26.510509490966797}, "sample_time_ms": 237334.149, "grad_time_ms": 696.568, "load_time_ms": 1.489, "update_time_ms": 2.533}, "timesteps_total": 117600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 117600, "hostname": "cda-server-3", "episode_reward_max": -52.858943297092495}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 24407.357256412506, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -101.42589945490688, "iterations_since_restore": 99, "episodes_total": 2376, "timestamp": 1756418824, "episode_reward_mean": -62.44087108395161, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_00-07-04", "policy_reward_mean": {}, "time_this_iter_s": 271.2348566055298, "episodes_this_iter": 24, "training_iteration": 99, "time_total_s": 24407.357256412506, "info": {"num_steps_sampled": 118800, "num_steps_trained": 118800, "default": {"policy_loss": -0.1403992772102356, "vf_explained_var": 0.8786462545394897, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 19.873876571655273, "entropy": 16.25355339050293, "kl": 0.0183703675866127, "total_loss": 19.752073287963867}, "sample_time_ms": 240501.394, "grad_time_ms": 696.21, "load_time_ms": 1.433, "update_time_ms": 2.511}, "timesteps_total": 118800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 118800, "hostname": "cda-server-3", "episode_reward_max": -52.858943297092495}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 24657.685720443726, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -89.90023007400104, "iterations_since_restore": 100, "episodes_total": 2400, "timestamp": 1756419075, "episode_reward_mean": -61.653440944418, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_00-11-15", "policy_reward_mean": {}, "time_this_iter_s": 250.32846403121948, "episodes_this_iter": 24, "training_iteration": 100, "time_total_s": 24657.685720443726, "info": {"num_steps_sampled": 120000, "num_steps_trained": 120000, "default": {"policy_loss": -0.1290886104106903, "vf_explained_var": 0.8327670097351074, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 31.24372100830078, "entropy": 16.272443771362305, "kl": 0.015662631019949913, "total_loss": 31.1304931640625}, "sample_time_ms": 244985.593, "grad_time_ms": 696.091, "load_time_ms": 1.492, "update_time_ms": 2.499}, "timesteps_total": 120000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 120000, "hostname": "cda-server-3", "episode_reward_max": -52.93601767317048}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 24920.84255218506, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -89.90023007400104, "iterations_since_restore": 101, "episodes_total": 2424, "timestamp": 1756419338, "episode_reward_mean": -61.131917472065616, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_00-15-38", "policy_reward_mean": {}, "time_this_iter_s": 263.156831741333, "episodes_this_iter": 24, "training_iteration": 101, "time_total_s": 24920.84255218506, "info": {"num_steps_sampled": 121200, "num_steps_trained": 121200, "default": {"policy_loss": -0.11977836489677429, "vf_explained_var": 0.8679201006889343, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 26.77100372314453, "entropy": 16.270166397094727, "kl": 0.015529219061136246, "total_loss": 26.66695213317871}, "sample_time_ms": 244548.732, "grad_time_ms": 696.194, "load_time_ms": 1.493, "update_time_ms": 2.508}, "timesteps_total": 121200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 121200, "hostname": "cda-server-3", "episode_reward_max": -52.93601767317048}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 25163.190752744675, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -81.1903957303375, "iterations_since_restore": 102, "episodes_total": 2448, "timestamp": 1756419580, "episode_reward_mean": -60.72351474107361, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_00-19-40", "policy_reward_mean": {}, "time_this_iter_s": 242.3482005596161, "episodes_this_iter": 24, "training_iteration": 102, "time_total_s": 25163.190752744675, "info": {"num_steps_sampled": 122400, "num_steps_trained": 122400, "default": {"policy_loss": -0.12940487265586853, "vf_explained_var": 0.882462739944458, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 23.390724182128906, "entropy": 16.316390991210938, "kl": 0.015969369560480118, "total_loss": 23.277488708496094}, "sample_time_ms": 243243.22, "grad_time_ms": 696.232, "load_time_ms": 1.496, "update_time_ms": 2.468}, "timesteps_total": 122400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 122400, "hostname": "cda-server-3", "episode_reward_max": -52.93601767317048}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 25435.75412583351, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -81.1903957303375, "iterations_since_restore": 103, "episodes_total": 2472, "timestamp": 1756419853, "episode_reward_mean": -60.79222265253318, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_00-24-13", "policy_reward_mean": {}, "time_this_iter_s": 272.56337308883667, "episodes_this_iter": 24, "training_iteration": 103, "time_total_s": 25435.75412583351, "info": {"num_steps_sampled": 123600, "num_steps_trained": 123600, "default": {"policy_loss": -0.13579684495925903, "vf_explained_var": 0.8778722882270813, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 22.02468490600586, "entropy": 16.328903198242188, "kl": 0.016616467386484146, "total_loss": 21.90571403503418}, "sample_time_ms": 245811.227, "grad_time_ms": 697.59, "load_time_ms": 1.492, "update_time_ms": 2.459}, "timesteps_total": 123600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 123600, "hostname": "cda-server-3", "episode_reward_max": -52.93601767317048}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 25665.85821557045, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -81.1903957303375, "iterations_since_restore": 104, "episodes_total": 2496, "timestamp": 1756420083, "episode_reward_mean": -60.116771525483344, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_00-28-03", "policy_reward_mean": {}, "time_this_iter_s": 230.10408973693848, "episodes_this_iter": 24, "training_iteration": 104, "time_total_s": 25665.85821557045, "info": {"num_steps_sampled": 124800, "num_steps_trained": 124800, "default": {"policy_loss": -0.15344049036502838, "vf_explained_var": 0.8817589282989502, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 18.401994705200195, "entropy": 16.331209182739258, "kl": 0.016265608370304108, "total_loss": 18.26502227783203}, "sample_time_ms": 244590.369, "grad_time_ms": 697.667, "load_time_ms": 1.531, "update_time_ms": 2.454}, "timesteps_total": 124800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 124800, "hostname": "cda-server-3", "episode_reward_max": -52.95849628922025}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 25892.035324811935, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -82.16212772395187, "iterations_since_restore": 105, "episodes_total": 2520, "timestamp": 1756420309, "episode_reward_mean": -60.16023217998311, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_00-31-49", "policy_reward_mean": {}, "time_this_iter_s": 226.1771092414856, "episodes_this_iter": 24, "training_iteration": 105, "time_total_s": 25892.035324811935, "info": {"num_steps_sampled": 126000, "num_steps_trained": 126000, "default": {"policy_loss": -0.12750448286533356, "vf_explained_var": 0.8136303424835205, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 38.1964111328125, "entropy": 16.27743148803711, "kl": 0.016278643161058426, "total_loss": 38.08538818359375}, "sample_time_ms": 245834.201, "grad_time_ms": 697.003, "load_time_ms": 1.436, "update_time_ms": 2.444}, "timesteps_total": 126000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 126000, "hostname": "cda-server-3", "episode_reward_max": -52.93952025325732}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 26110.90698647499, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -82.16212772395187, "iterations_since_restore": 106, "episodes_total": 2544, "timestamp": 1756420528, "episode_reward_mean": -59.81542332779563, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_00-35-28", "policy_reward_mean": {}, "time_this_iter_s": 218.87166166305542, "episodes_this_iter": 24, "training_iteration": 106, "time_total_s": 26110.90698647499, "info": {"num_steps_sampled": 127200, "num_steps_trained": 127200, "default": {"policy_loss": -0.13325509428977966, "vf_explained_var": 0.8773702383041382, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 22.744340896606445, "entropy": 16.201231002807617, "kl": 0.016619432717561722, "total_loss": 22.627914428710938}, "sample_time_ms": 242611.106, "grad_time_ms": 697.237, "load_time_ms": 1.401, "update_time_ms": 2.481}, "timesteps_total": 127200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 127200, "hostname": "cda-server-3", "episode_reward_max": -52.914738431937806}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 26360.525168180466, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -82.16212772395187, "iterations_since_restore": 107, "episodes_total": 2568, "timestamp": 1756420778, "episode_reward_mean": -59.673469220947396, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_00-39-38", "policy_reward_mean": {}, "time_this_iter_s": 249.61818170547485, "episodes_this_iter": 24, "training_iteration": 107, "time_total_s": 26360.525168180466, "info": {"num_steps_sampled": 128400, "num_steps_trained": 128400, "default": {"policy_loss": -0.12265331298112869, "vf_explained_var": 0.8668314218521118, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 21.25311279296875, "entropy": 16.13929557800293, "kl": 0.017199309542775154, "total_loss": 21.14787483215332}, "sample_time_ms": 241790.366, "grad_time_ms": 697.759, "load_time_ms": 1.38, "update_time_ms": 2.478}, "timesteps_total": 128400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 128400, "hostname": "cda-server-3", "episode_reward_max": -51.02603246046728}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 26604.6365506649, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -82.16212772395187, "iterations_since_restore": 108, "episodes_total": 2592, "timestamp": 1756421022, "episode_reward_mean": -59.606878303662, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_00-43-42", "policy_reward_mean": {}, "time_this_iter_s": 244.11138248443604, "episodes_this_iter": 24, "training_iteration": 108, "time_total_s": 26604.6365506649, "info": {"num_steps_sampled": 129600, "num_steps_trained": 129600, "default": {"policy_loss": -0.13076123595237732, "vf_explained_var": 0.8132724761962891, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 33.19261169433594, "entropy": 16.225126266479492, "kl": 0.01657184027135372, "total_loss": 33.07863235473633}, "sample_time_ms": 246144.027, "grad_time_ms": 698.614, "load_time_ms": 1.333, "update_time_ms": 2.485}, "timesteps_total": 129600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 129600, "hostname": "cda-server-3", "episode_reward_max": -51.02603246046728}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 26834.84356546402, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -84.93840741162363, "iterations_since_restore": 109, "episodes_total": 2616, "timestamp": 1756421252, "episode_reward_mean": -59.694966777893185, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_00-47-32", "policy_reward_mean": {}, "time_this_iter_s": 230.20701479911804, "episodes_this_iter": 24, "training_iteration": 109, "time_total_s": 26834.84356546402, "info": {"num_steps_sampled": 130800, "num_steps_trained": 130800, "default": {"policy_loss": -0.12276914715766907, "vf_explained_var": 0.85801762342453, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 28.201007843017578, "entropy": 16.107158660888672, "kl": 0.015431146137416363, "total_loss": 28.093862533569336}, "sample_time_ms": 242041.052, "grad_time_ms": 698.686, "load_time_ms": 1.388, "update_time_ms": 2.481}, "timesteps_total": 130800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 130800, "hostname": "cda-server-3", "episode_reward_max": -50.069767460137605}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 27092.147441625595, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -84.93840741162363, "iterations_since_restore": 110, "episodes_total": 2640, "timestamp": 1756421509, "episode_reward_mean": -59.04001522812641, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_00-51-49", "policy_reward_mean": {}, "time_this_iter_s": 257.3038761615753, "episodes_this_iter": 24, "training_iteration": 110, "time_total_s": 27092.147441625595, "info": {"num_steps_sampled": 132000, "num_steps_trained": 132000, "default": {"policy_loss": -0.13041992485523224, "vf_explained_var": 0.8788143396377563, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 20.625926971435547, "entropy": 16.039676666259766, "kl": 0.017292585223913193, "total_loss": 20.513015747070312}, "sample_time_ms": 242737.964, "grad_time_ms": 699.245, "load_time_ms": 1.426, "update_time_ms": 2.5}, "timesteps_total": 132000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 132000, "hostname": "cda-server-3", "episode_reward_max": -50.069767460137605}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 27331.856004953384, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -84.93840741162363, "iterations_since_restore": 111, "episodes_total": 2664, "timestamp": 1756421749, "episode_reward_mean": -59.07273972534611, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_00-55-49", "policy_reward_mean": {}, "time_this_iter_s": 239.7085633277893, "episodes_this_iter": 24, "training_iteration": 111, "time_total_s": 27331.856004953384, "info": {"num_steps_sampled": 133200, "num_steps_trained": 133200, "default": {"policy_loss": -0.12182916700839996, "vf_explained_var": 0.8311696648597717, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 29.08080291748047, "entropy": 16.15522003173828, "kl": 0.016714682802557945, "total_loss": 28.97589874267578}, "sample_time_ms": 240394.014, "grad_time_ms": 698.393, "load_time_ms": 1.415, "update_time_ms": 2.512}, "timesteps_total": 133200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 133200, "hostname": "cda-server-3", "episode_reward_max": -50.069767460137605}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 27587.61087012291, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -86.78311202087484, "iterations_since_restore": 112, "episodes_total": 2688, "timestamp": 1756422005, "episode_reward_mean": -59.18881358171987, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_01-00-05", "policy_reward_mean": {}, "time_this_iter_s": 255.75486516952515, "episodes_this_iter": 24, "training_iteration": 112, "time_total_s": 27587.61087012291, "info": {"num_steps_sampled": 134400, "num_steps_trained": 134400, "default": {"policy_loss": -0.140395849943161, "vf_explained_var": 0.8482707738876343, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 28.473026275634766, "entropy": 16.06305694580078, "kl": 0.016294434666633606, "total_loss": 28.3491268157959}, "sample_time_ms": 241734.136, "grad_time_ms": 698.843, "load_time_ms": 1.411, "update_time_ms": 2.56}, "timesteps_total": 134400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 134400, "hostname": "cda-server-3", "episode_reward_max": -50.069767460137605}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 27806.619978904724, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -86.78311202087484, "iterations_since_restore": 113, "episodes_total": 2712, "timestamp": 1756422224, "episode_reward_mean": -58.635297871876844, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_01-03-44", "policy_reward_mean": {}, "time_this_iter_s": 219.00910878181458, "episodes_this_iter": 24, "training_iteration": 113, "time_total_s": 27806.619978904724, "info": {"num_steps_sampled": 135600, "num_steps_trained": 135600, "default": {"policy_loss": -0.1381041407585144, "vf_explained_var": 0.9284831285476685, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 11.675448417663574, "entropy": 16.15050506591797, "kl": 0.016876710578799248, "total_loss": 11.554431915283203}, "sample_time_ms": 236379.743, "grad_time_ms": 697.874, "load_time_ms": 1.368, "update_time_ms": 2.548}, "timesteps_total": 135600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 135600, "hostname": "cda-server-3", "episode_reward_max": -52.83280264414459}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 28034.539868831635, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -92.99670859655961, "iterations_since_restore": 114, "episodes_total": 2736, "timestamp": 1756422452, "episode_reward_mean": -58.661219019914526, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_01-07-32", "policy_reward_mean": {}, "time_this_iter_s": 227.9198899269104, "episodes_this_iter": 24, "training_iteration": 114, "time_total_s": 28034.539868831635, "info": {"num_steps_sampled": 136800, "num_steps_trained": 136800, "default": {"policy_loss": -0.13784296810626984, "vf_explained_var": 0.8285303115844727, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 34.57063293457031, "entropy": 16.13674545288086, "kl": 0.016130059957504272, "total_loss": 34.44912338256836}, "sample_time_ms": 236162.246, "grad_time_ms": 696.885, "load_time_ms": 1.405, "update_time_ms": 2.553}, "timesteps_total": 136800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 136800, "hostname": "cda-server-3", "episode_reward_max": -52.83280264414459}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 28336.070405721664, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -92.99670859655961, "iterations_since_restore": 115, "episodes_total": 2760, "timestamp": 1756422753, "episode_reward_mean": -58.39664888282129, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_01-12-33", "policy_reward_mean": {}, "time_this_iter_s": 301.5305368900299, "episodes_this_iter": 24, "training_iteration": 115, "time_total_s": 28336.070405721664, "info": {"num_steps_sampled": 138000, "num_steps_trained": 138000, "default": {"policy_loss": -0.12080780416727066, "vf_explained_var": 0.8865867853164673, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 17.852455139160156, "entropy": 16.0211238861084, "kl": 0.015340049751102924, "total_loss": 17.747180938720703}, "sample_time_ms": 243697.146, "grad_time_ms": 697.206, "load_time_ms": 1.493, "update_time_ms": 2.538}, "timesteps_total": 138000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 138000, "hostname": "cda-server-3", "episode_reward_max": -51.878619471983534}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 28640.148250341415, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -92.99670859655961, "iterations_since_restore": 116, "episodes_total": 2784, "timestamp": 1756423057, "episode_reward_mean": -57.572993058078616, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_01-17-37", "policy_reward_mean": {}, "time_this_iter_s": 304.077844619751, "episodes_this_iter": 24, "training_iteration": 116, "time_total_s": 28640.148250341415, "info": {"num_steps_sampled": 139200, "num_steps_trained": 139200, "default": {"policy_loss": -0.1292750984430313, "vf_explained_var": 0.8631255030632019, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 22.42864227294922, "entropy": 16.13391876220703, "kl": 0.01578795537352562, "total_loss": 22.315351486206055}, "sample_time_ms": 252216.644, "grad_time_ms": 698.308, "load_time_ms": 1.533, "update_time_ms": 2.518}, "timesteps_total": 139200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 139200, "hostname": "cda-server-3", "episode_reward_max": -51.878619471983534}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 28893.488532304764, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -92.99670859655961, "iterations_since_restore": 117, "episodes_total": 2808, "timestamp": 1756423311, "episode_reward_mean": -58.42551707762653, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_01-21-51", "policy_reward_mean": {}, "time_this_iter_s": 253.3402819633484, "episodes_this_iter": 24, "training_iteration": 117, "time_total_s": 28893.488532304764, "info": {"num_steps_sampled": 140400, "num_steps_trained": 140400, "default": {"policy_loss": -0.1352321207523346, "vf_explained_var": 0.8869233727455139, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 21.718400955200195, "entropy": 16.096532821655273, "kl": 0.014851750805974007, "total_loss": 21.59820556640625}, "sample_time_ms": 252588.37, "grad_time_ms": 698.676, "load_time_ms": 1.574, "update_time_ms": 2.512}, "timesteps_total": 140400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 140400, "hostname": "cda-server-3", "episode_reward_max": -51.878619471983534}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 29127.301443338394, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -88.65954468392255, "iterations_since_restore": 118, "episodes_total": 2832, "timestamp": 1756423544, "episode_reward_mean": -58.24708847794195, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_01-25-44", "policy_reward_mean": {}, "time_this_iter_s": 233.81291103363037, "episodes_this_iter": 24, "training_iteration": 118, "time_total_s": 29127.301443338394, "info": {"num_steps_sampled": 141600, "num_steps_trained": 141600, "default": {"policy_loss": -0.13132750988006592, "vf_explained_var": 0.7502151727676392, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 54.89358901977539, "entropy": 16.050901412963867, "kl": 0.015956096351146698, "total_loss": 54.778411865234375}, "sample_time_ms": 251559.104, "grad_time_ms": 698.078, "load_time_ms": 1.579, "update_time_ms": 2.541}, "timesteps_total": 141600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 141600, "hostname": "cda-server-3", "episode_reward_max": -51.86353434737764}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 29348.139184951782, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -88.65954468392255, "iterations_since_restore": 119, "episodes_total": 2856, "timestamp": 1756423765, "episode_reward_mean": -58.77166939777696, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_01-29-25", "policy_reward_mean": {}, "time_this_iter_s": 220.83774161338806, "episodes_this_iter": 24, "training_iteration": 119, "time_total_s": 29348.139184951782, "info": {"num_steps_sampled": 142800, "num_steps_trained": 142800, "default": {"policy_loss": -0.1422090232372284, "vf_explained_var": 0.9099141359329224, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 15.585625648498535, "entropy": 15.948657989501953, "kl": 0.01609078049659729, "total_loss": 15.459708213806152}, "sample_time_ms": 250621.264, "grad_time_ms": 699.035, "load_time_ms": 1.565, "update_time_ms": 2.559}, "timesteps_total": 142800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 142800, "hostname": "cda-server-3", "episode_reward_max": -51.86353434737764}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 29608.62323451042, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -88.65954468392255, "iterations_since_restore": 120, "episodes_total": 2880, "timestamp": 1756424026, "episode_reward_mean": -59.42504088928788, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_01-33-46", "policy_reward_mean": {}, "time_this_iter_s": 260.4840495586395, "episodes_this_iter": 24, "training_iteration": 120, "time_total_s": 29608.62323451042, "info": {"num_steps_sampled": 144000, "num_steps_trained": 144000, "default": {"policy_loss": -0.1321364790201187, "vf_explained_var": 0.8487840890884399, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 26.277753829956055, "entropy": 15.773978233337402, "kl": 0.01596074178814888, "total_loss": 26.16177749633789}, "sample_time_ms": 250940.818, "grad_time_ms": 697.606, "load_time_ms": 1.492, "update_time_ms": 2.549}, "timesteps_total": 144000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 144000, "hostname": "cda-server-3", "episode_reward_max": -51.15904062506867}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 29864.01040172577, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -105.43882619369613, "iterations_since_restore": 121, "episodes_total": 2904, "timestamp": 1756424281, "episode_reward_mean": -59.08303499192223, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_01-38-01", "policy_reward_mean": {}, "time_this_iter_s": 255.3871672153473, "episodes_this_iter": 24, "training_iteration": 121, "time_total_s": 29864.01040172577, "info": {"num_steps_sampled": 145200, "num_steps_trained": 145200, "default": {"policy_loss": -0.11356958746910095, "vf_explained_var": 0.7982986569404602, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 45.23063278198242, "entropy": 15.830936431884766, "kl": 0.01379266008734703, "total_loss": 45.13102722167969}, "sample_time_ms": 252508.147, "grad_time_ms": 698.153, "load_time_ms": 1.499, "update_time_ms": 2.525}, "timesteps_total": 145200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 145200, "hostname": "cda-server-3", "episode_reward_max": -51.15904062506867}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 30100.95377969742, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -105.43882619369613, "iterations_since_restore": 122, "episodes_total": 2928, "timestamp": 1756424518, "episode_reward_mean": -58.620452296311754, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_01-41-58", "policy_reward_mean": {}, "time_this_iter_s": 236.94337797164917, "episodes_this_iter": 24, "training_iteration": 122, "time_total_s": 30100.95377969742, "info": {"num_steps_sampled": 146400, "num_steps_trained": 146400, "default": {"policy_loss": -0.14042048156261444, "vf_explained_var": 0.9276683330535889, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 10.557514190673828, "entropy": 15.92667293548584, "kl": 0.016386190429329872, "total_loss": 10.433683395385742}, "sample_time_ms": 250627.109, "grad_time_ms": 698.127, "load_time_ms": 1.5, "update_time_ms": 2.486}, "timesteps_total": 146400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 146400, "hostname": "cda-server-3", "episode_reward_max": -51.15904062506867}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 30326.80412006378, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -105.43882619369613, "iterations_since_restore": 123, "episodes_total": 2952, "timestamp": 1756424744, "episode_reward_mean": -58.52559615811242, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_01-45-44", "policy_reward_mean": {}, "time_this_iter_s": 225.85034036636353, "episodes_this_iter": 24, "training_iteration": 123, "time_total_s": 30326.80412006378, "info": {"num_steps_sampled": 147600, "num_steps_trained": 147600, "default": {"policy_loss": -0.13525259494781494, "vf_explained_var": 0.8784549236297607, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 20.95810317993164, "entropy": 15.93128776550293, "kl": 0.014947210438549519, "total_loss": 20.837984085083008}, "sample_time_ms": 251310.891, "grad_time_ms": 698.384, "load_time_ms": 1.545, "update_time_ms": 2.509}, "timesteps_total": 147600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 147600, "hostname": "cda-server-3", "episode_reward_max": -51.15904062506867}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 30578.466166734695, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -105.43882619369613, "iterations_since_restore": 124, "episodes_total": 2976, "timestamp": 1756424996, "episode_reward_mean": -57.50727125609862, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_01-49-56", "policy_reward_mean": {}, "time_this_iter_s": 251.6620466709137, "episodes_this_iter": 24, "training_iteration": 124, "time_total_s": 30578.466166734695, "info": {"num_steps_sampled": 148800, "num_steps_trained": 148800, "default": {"policy_loss": -0.1361684650182724, "vf_explained_var": 0.8873589634895325, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 18.034313201904297, "entropy": 15.900761604309082, "kl": 0.01829693093895912, "total_loss": 17.916669845581055}, "sample_time_ms": 253684.863, "grad_time_ms": 698.555, "load_time_ms": 1.553, "update_time_ms": 2.52}, "timesteps_total": 148800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 148800, "hostname": "cda-server-3", "episode_reward_max": -51.15904062506867}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 30794.069765806198, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -86.28003722489746, "iterations_since_restore": 125, "episodes_total": 3000, "timestamp": 1756425211, "episode_reward_mean": -56.68986158951923, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_01-53-31", "policy_reward_mean": {}, "time_this_iter_s": 215.60359907150269, "episodes_this_iter": 24, "training_iteration": 125, "time_total_s": 30794.069765806198, "info": {"num_steps_sampled": 150000, "num_steps_trained": 150000, "default": {"policy_loss": -0.14660833775997162, "vf_explained_var": 0.9275010228157043, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 11.063407897949219, "entropy": 15.772184371948242, "kl": 0.016784558072686195, "total_loss": 10.933794021606445}, "sample_time_ms": 245091.181, "grad_time_ms": 699.565, "load_time_ms": 1.515, "update_time_ms": 2.549}, "timesteps_total": 150000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 150000, "hostname": "cda-server-3", "episode_reward_max": -51.562097171397795}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 31028.55344748497, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -86.28003722489746, "iterations_since_restore": 126, "episodes_total": 3024, "timestamp": 1756425446, "episode_reward_mean": -56.94375005421898, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_01-57-26", "policy_reward_mean": {}, "time_this_iter_s": 234.48368167877197, "episodes_this_iter": 24, "training_iteration": 126, "time_total_s": 31028.55344748497, "info": {"num_steps_sampled": 151200, "num_steps_trained": 151200, "default": {"policy_loss": -0.12616945803165436, "vf_explained_var": 0.9062788486480713, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 14.618449211120605, "entropy": 15.753960609436035, "kl": 0.017071321606636047, "total_loss": 14.509563446044922}, "sample_time_ms": 238132.026, "grad_time_ms": 699.342, "load_time_ms": 1.5, "update_time_ms": 2.547}, "timesteps_total": 151200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 151200, "hostname": "cda-server-3", "episode_reward_max": -51.562097171397795}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 31243.675163269043, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -80.45088177963235, "iterations_since_restore": 127, "episodes_total": 3048, "timestamp": 1756425661, "episode_reward_mean": -56.65103369765881, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_02-01-01", "policy_reward_mean": {}, "time_this_iter_s": 215.12171578407288, "episodes_this_iter": 24, "training_iteration": 127, "time_total_s": 31243.675163269043, "info": {"num_steps_sampled": 152400, "num_steps_trained": 152400, "default": {"policy_loss": -0.11479911208152771, "vf_explained_var": 0.7940958738327026, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 36.7393913269043, "entropy": 15.598017692565918, "kl": 0.015916500240564346, "total_loss": 36.640708923339844}, "sample_time_ms": 234310.54, "grad_time_ms": 699.01, "load_time_ms": 1.489, "update_time_ms": 2.553}, "timesteps_total": 152400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 152400, "hostname": "cda-server-3", "episode_reward_max": -51.562097171397795}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 31459.257354974747, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -79.52035184489046, "iterations_since_restore": 128, "episodes_total": 3072, "timestamp": 1756425876, "episode_reward_mean": -56.38341833475086, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_02-04-36", "policy_reward_mean": {}, "time_this_iter_s": 215.58219170570374, "episodes_this_iter": 24, "training_iteration": 128, "time_total_s": 31459.257354974747, "info": {"num_steps_sampled": 153600, "num_steps_trained": 153600, "default": {"policy_loss": -0.14602722227573395, "vf_explained_var": 0.9135898947715759, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 12.709991455078125, "entropy": 15.712790489196777, "kl": 0.017770998179912567, "total_loss": 12.58195686340332}, "sample_time_ms": 232487.546, "grad_time_ms": 698.863, "load_time_ms": 1.556, "update_time_ms": 2.519}, "timesteps_total": 153600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 153600, "hostname": "cda-server-3", "episode_reward_max": -51.562097171397795}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 31731.1027405262, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -79.52035184489046, "iterations_since_restore": 129, "episodes_total": 3096, "timestamp": 1756426148, "episode_reward_mean": -56.568582278886524, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_02-09-08", "policy_reward_mean": {}, "time_this_iter_s": 271.84538555145264, "episodes_this_iter": 24, "training_iteration": 129, "time_total_s": 31731.1027405262, "info": {"num_steps_sampled": 154800, "num_steps_trained": 154800, "default": {"policy_loss": -0.12171263247728348, "vf_explained_var": 0.8592672348022461, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 21.35623550415039, "entropy": 15.582194328308105, "kl": 0.016301354393363, "total_loss": 21.251028060913086}, "sample_time_ms": 237589.432, "grad_time_ms": 697.683, "load_time_ms": 1.595, "update_time_ms": 2.499}, "timesteps_total": 154800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 154800, "hostname": "cda-server-3", "episode_reward_max": -51.830023605268046}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 31959.038396835327, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -79.52035184489046, "iterations_since_restore": 130, "episodes_total": 3120, "timestamp": 1756426376, "episode_reward_mean": -56.192528320350384, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_02-12-56", "policy_reward_mean": {}, "time_this_iter_s": 227.9356563091278, "episodes_this_iter": 24, "training_iteration": 130, "time_total_s": 31959.038396835327, "info": {"num_steps_sampled": 156000, "num_steps_trained": 156000, "default": {"policy_loss": -0.12034373730421066, "vf_explained_var": 0.9029307961463928, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 14.961315155029297, "entropy": 15.58940315246582, "kl": 0.01572321727871895, "total_loss": 14.856891632080078}, "sample_time_ms": 234334.761, "grad_time_ms": 697.659, "load_time_ms": 1.537, "update_time_ms": 2.496}, "timesteps_total": 156000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 156000, "hostname": "cda-server-3", "episode_reward_max": -51.19035379947645}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 32195.998419046402, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -70.91395079921254, "iterations_since_restore": 131, "episodes_total": 3144, "timestamp": 1756426613, "episode_reward_mean": -55.76523066885816, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_02-16-53", "policy_reward_mean": {}, "time_this_iter_s": 236.96002221107483, "episodes_this_iter": 24, "training_iteration": 131, "time_total_s": 32195.998419046402, "info": {"num_steps_sampled": 157200, "num_steps_trained": 157200, "default": {"policy_loss": -0.13559547066688538, "vf_explained_var": 0.9158918857574463, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 12.444634437561035, "entropy": 15.584784507751465, "kl": 0.016098035499453545, "total_loss": 12.325338363647461}, "sample_time_ms": 232491.655, "grad_time_ms": 697.978, "load_time_ms": 1.559, "update_time_ms": 2.546}, "timesteps_total": 157200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 157200, "hostname": "cda-server-3", "episode_reward_max": -51.09991333542589}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 32449.557423353195, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -70.91395079921254, "iterations_since_restore": 132, "episodes_total": 3168, "timestamp": 1756426867, "episode_reward_mean": -55.86116142206185, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_02-21-07", "policy_reward_mean": {}, "time_this_iter_s": 253.5590043067932, "episodes_this_iter": 24, "training_iteration": 132, "time_total_s": 32449.557423353195, "info": {"num_steps_sampled": 158400, "num_steps_trained": 158400, "default": {"policy_loss": -0.12703874707221985, "vf_explained_var": 0.926753044128418, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 10.651493072509766, "entropy": 15.580692291259766, "kl": 0.016220103949308395, "total_loss": 10.540875434875488}, "sample_time_ms": 234153.813, "grad_time_ms": 697.494, "load_time_ms": 1.499, "update_time_ms": 2.553}, "timesteps_total": 158400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 158400, "hostname": "cda-server-3", "episode_reward_max": -51.09991333542589}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 32706.75931406021, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -93.98838629496043, "iterations_since_restore": 133, "episodes_total": 3192, "timestamp": 1756427124, "episode_reward_mean": -56.2012627641873, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_02-25-24", "policy_reward_mean": {}, "time_this_iter_s": 257.201890707016, "episodes_this_iter": 24, "training_iteration": 133, "time_total_s": 32706.75931406021, "info": {"num_steps_sampled": 159600, "num_steps_trained": 159600, "default": {"policy_loss": -0.1338438093662262, "vf_explained_var": 0.8740109205245972, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 23.77425193786621, "entropy": 15.500330924987793, "kl": 0.015482652932405472, "total_loss": 23.656084060668945}, "sample_time_ms": 237288.091, "grad_time_ms": 698.382, "load_time_ms": 1.487, "update_time_ms": 2.552}, "timesteps_total": 159600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 159600, "hostname": "cda-server-3", "episode_reward_max": -51.09991333542589}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 32972.81243276596, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -93.98838629496043, "iterations_since_restore": 134, "episodes_total": 3216, "timestamp": 1756427390, "episode_reward_mean": -56.35912897300799, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_02-29-50", "policy_reward_mean": {}, "time_this_iter_s": 266.0531187057495, "episodes_this_iter": 24, "training_iteration": 134, "time_total_s": 32972.81243276596, "info": {"num_steps_sampled": 160800, "num_steps_trained": 160800, "default": {"policy_loss": -0.13807255029678345, "vf_explained_var": 0.9234582185745239, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 12.561636924743652, "entropy": 15.425944328308105, "kl": 0.01584099791944027, "total_loss": 12.439602851867676}, "sample_time_ms": 238727.197, "grad_time_ms": 698.404, "load_time_ms": 1.465, "update_time_ms": 2.538}, "timesteps_total": 160800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 160800, "hostname": "cda-server-3", "episode_reward_max": -51.09991333542589}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 33229.83745789528, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -93.98838629496043, "iterations_since_restore": 135, "episodes_total": 3240, "timestamp": 1756427647, "episode_reward_mean": -56.45386895016317, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_02-34-07", "policy_reward_mean": {}, "time_this_iter_s": 257.02502512931824, "episodes_this_iter": 24, "training_iteration": 135, "time_total_s": 33229.83745789528, "info": {"num_steps_sampled": 162000, "num_steps_trained": 162000, "default": {"policy_loss": -0.12910763919353485, "vf_explained_var": 0.9062867760658264, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 13.637471199035645, "entropy": 15.366029739379883, "kl": 0.01572471857070923, "total_loss": 13.524285316467285}, "sample_time_ms": 242870.088, "grad_time_ms": 697.763, "load_time_ms": 1.412, "update_time_ms": 2.526}, "timesteps_total": 162000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 162000, "hostname": "cda-server-3", "episode_reward_max": -51.69027924314964}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 33496.372004032135, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -96.18207900565379, "iterations_since_restore": 136, "episodes_total": 3264, "timestamp": 1756427914, "episode_reward_mean": -56.74371575273542, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_02-38-34", "policy_reward_mean": {}, "time_this_iter_s": 266.5345461368561, "episodes_this_iter": 24, "training_iteration": 136, "time_total_s": 33496.372004032135, "info": {"num_steps_sampled": 163200, "num_steps_trained": 163200, "default": {"policy_loss": -0.12022534012794495, "vf_explained_var": 0.8324052095413208, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 36.34650421142578, "entropy": 15.361777305603027, "kl": 0.013458560220897198, "total_loss": 36.239906311035156}, "sample_time_ms": 246076.3, "grad_time_ms": 696.66, "load_time_ms": 1.378, "update_time_ms": 2.542}, "timesteps_total": 163200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 163200, "hostname": "cda-server-3", "episode_reward_max": -51.69027924314964}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 33734.41650533676, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -96.18207900565379, "iterations_since_restore": 137, "episodes_total": 3288, "timestamp": 1756428152, "episode_reward_mean": -56.32144548975378, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_02-42-32", "policy_reward_mean": {}, "time_this_iter_s": 238.04450130462646, "episodes_this_iter": 24, "training_iteration": 137, "time_total_s": 33734.41650533676, "info": {"num_steps_sampled": 164400, "num_steps_trained": 164400, "default": {"policy_loss": -0.14246992766857147, "vf_explained_var": 0.8585296273231506, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 23.249908447265625, "entropy": 15.368772506713867, "kl": 0.0162531528621912, "total_loss": 23.12389373779297}, "sample_time_ms": 248368.943, "grad_time_ms": 696.335, "load_time_ms": 1.371, "update_time_ms": 2.531}, "timesteps_total": 164400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 164400, "hostname": "cda-server-3", "episode_reward_max": -51.40951762538001}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 33984.96528124809, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -96.18207900565379, "iterations_since_restore": 138, "episodes_total": 3312, "timestamp": 1756428402, "episode_reward_mean": -55.87355432583267, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_02-46-42", "policy_reward_mean": {}, "time_this_iter_s": 250.54877591133118, "episodes_this_iter": 24, "training_iteration": 138, "time_total_s": 33984.96528124809, "info": {"num_steps_sampled": 165600, "num_steps_trained": 165600, "default": {"policy_loss": -0.1364402174949646, "vf_explained_var": 0.9217305779457092, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 11.053638458251953, "entropy": 15.37063980102539, "kl": 0.016554994508624077, "total_loss": 10.933959007263184}, "sample_time_ms": 251865.386, "grad_time_ms": 696.52, "load_time_ms": 1.384, "update_time_ms": 2.537}, "timesteps_total": 165600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 165600, "hostname": "cda-server-3", "episode_reward_max": -51.40951762538001}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 34215.42980790138, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -96.18207900565379, "iterations_since_restore": 139, "episodes_total": 3336, "timestamp": 1756428633, "episode_reward_mean": -55.7571264546207, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_02-50-33", "policy_reward_mean": {}, "time_this_iter_s": 230.4645266532898, "episodes_this_iter": 24, "training_iteration": 139, "time_total_s": 34215.42980790138, "info": {"num_steps_sampled": 166800, "num_steps_trained": 166800, "default": {"policy_loss": -0.11645391583442688, "vf_explained_var": 0.9018339514732361, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 14.361066818237305, "entropy": 15.37358570098877, "kl": 0.014754108153283596, "total_loss": 14.259552001953125}, "sample_time_ms": 247727.037, "grad_time_ms": 696.858, "load_time_ms": 1.332, "update_time_ms": 2.536}, "timesteps_total": 166800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 166800, "hostname": "cda-server-3", "episode_reward_max": -51.40951762538001}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 34444.063520908356, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -82.93349279790904, "iterations_since_restore": 140, "episodes_total": 3360, "timestamp": 1756428861, "episode_reward_mean": -55.31171767952917, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_02-54-21", "policy_reward_mean": {}, "time_this_iter_s": 228.63371300697327, "episodes_this_iter": 24, "training_iteration": 140, "time_total_s": 34444.063520908356, "info": {"num_steps_sampled": 168000, "num_steps_trained": 168000, "default": {"policy_loss": -0.1265363097190857, "vf_explained_var": 0.9019301533699036, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 15.231989860534668, "entropy": 15.134800910949707, "kl": 0.015528642572462559, "total_loss": 15.121174812316895}, "sample_time_ms": 247796.422, "grad_time_ms": 697.2, "load_time_ms": 1.367, "update_time_ms": 2.552}, "timesteps_total": 168000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 168000, "hostname": "cda-server-3", "episode_reward_max": -51.40951762538001}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 34734.807121276855, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -82.93349279790904, "iterations_since_restore": 141, "episodes_total": 3384, "timestamp": 1756429152, "episode_reward_mean": -55.52611278640221, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_02-59-12", "policy_reward_mean": {}, "time_this_iter_s": 290.74360036849976, "episodes_this_iter": 24, "training_iteration": 141, "time_total_s": 34734.807121276855, "info": {"num_steps_sampled": 169200, "num_steps_trained": 169200, "default": {"policy_loss": -0.12432999163866043, "vf_explained_var": 0.8699341416358948, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 20.217525482177734, "entropy": 15.227035522460938, "kl": 0.015340043231844902, "total_loss": 20.108726501464844}, "sample_time_ms": 253175.993, "grad_time_ms": 695.989, "load_time_ms": 1.347, "update_time_ms": 2.547}, "timesteps_total": 169200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 169200, "hostname": "cda-server-3", "episode_reward_max": -51.912489943053544}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 35009.46830415726, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -68.6920622405596, "iterations_since_restore": 142, "episodes_total": 3408, "timestamp": 1756429427, "episode_reward_mean": -55.2943012698868, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_03-03-47", "policy_reward_mean": {}, "time_this_iter_s": 274.6611828804016, "episodes_this_iter": 24, "training_iteration": 142, "time_total_s": 35009.46830415726, "info": {"num_steps_sampled": 170400, "num_steps_trained": 170400, "default": {"policy_loss": -0.12539464235305786, "vf_explained_var": 0.8912346959114075, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 16.17151641845703, "entropy": 15.258182525634766, "kl": 0.016979189589619637, "total_loss": 16.063312530517578}, "sample_time_ms": 255286.282, "grad_time_ms": 695.94, "load_time_ms": 1.322, "update_time_ms": 2.54}, "timesteps_total": 170400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 170400, "hostname": "cda-server-3", "episode_reward_max": -48.129169098879075}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 35289.0909883976, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -72.46281944527702, "iterations_since_restore": 143, "episodes_total": 3432, "timestamp": 1756429706, "episode_reward_mean": -55.369224463036765, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_03-08-26", "policy_reward_mean": {}, "time_this_iter_s": 279.6226842403412, "episodes_this_iter": 24, "training_iteration": 143, "time_total_s": 35289.0909883976, "info": {"num_steps_sampled": 171600, "num_steps_trained": 171600, "default": {"policy_loss": -0.1365930438041687, "vf_explained_var": 0.912811279296875, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 13.83117389678955, "entropy": 15.256481170654297, "kl": 0.017149154096841812, "total_loss": 13.711945533752441}, "sample_time_ms": 257528.771, "grad_time_ms": 695.525, "load_time_ms": 1.324, "update_time_ms": 2.547}, "timesteps_total": 171600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 171600, "hostname": "cda-server-3", "episode_reward_max": -48.129169098879075}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 35545.71752953529, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -75.52954077212628, "iterations_since_restore": 144, "episodes_total": 3456, "timestamp": 1756429963, "episode_reward_mean": -55.29145726709157, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_03-12-43", "policy_reward_mean": {}, "time_this_iter_s": 256.6265411376953, "episodes_this_iter": 24, "training_iteration": 144, "time_total_s": 35545.71752953529, "info": {"num_steps_sampled": 172800, "num_steps_trained": 172800, "default": {"policy_loss": -0.14175564050674438, "vf_explained_var": 0.9073739647865295, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 15.246339797973633, "entropy": 15.155366897583008, "kl": 0.015908382833003998, "total_loss": 15.120692253112793}, "sample_time_ms": 256586.634, "grad_time_ms": 694.93, "load_time_ms": 1.436, "update_time_ms": 2.567}, "timesteps_total": 172800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 172800, "hostname": "cda-server-3", "episode_reward_max": -48.129169098879075}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 35821.987554073334, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -75.52954077212628, "iterations_since_restore": 145, "episodes_total": 3480, "timestamp": 1756430239, "episode_reward_mean": -54.90900760315747, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_03-17-19", "policy_reward_mean": {}, "time_this_iter_s": 276.27002453804016, "episodes_this_iter": 24, "training_iteration": 145, "time_total_s": 35821.987554073334, "info": {"num_steps_sampled": 174000, "num_steps_trained": 174000, "default": {"policy_loss": -0.1382271647453308, "vf_explained_var": 0.9090858101844788, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 13.077757835388184, "entropy": 15.22294807434082, "kl": 0.017670560628175735, "total_loss": 12.95742130279541}, "sample_time_ms": 258511.631, "grad_time_ms": 694.44, "load_time_ms": 1.449, "update_time_ms": 2.561}, "timesteps_total": 174000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 174000, "hostname": "cda-server-3", "episode_reward_max": -48.129169098879075}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 36097.462760448456, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -79.94967820860617, "iterations_since_restore": 146, "episodes_total": 3504, "timestamp": 1756430515, "episode_reward_mean": -55.34494379447178, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_03-21-55", "policy_reward_mean": {}, "time_this_iter_s": 275.47520637512207, "episodes_this_iter": 24, "training_iteration": 146, "time_total_s": 36097.462760448456, "info": {"num_steps_sampled": 175200, "num_steps_trained": 175200, "default": {"policy_loss": -0.13017256557941437, "vf_explained_var": 0.8144359588623047, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 34.2513542175293, "entropy": 15.095757484436035, "kl": 0.014651145786046982, "total_loss": 34.136016845703125}, "sample_time_ms": 259405.121, "grad_time_ms": 694.972, "load_time_ms": 1.499, "update_time_ms": 2.528}, "timesteps_total": 175200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 175200, "hostname": "cda-server-3", "episode_reward_max": -49.86107777805505}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 36382.53430700302, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -79.94967820860617, "iterations_since_restore": 147, "episodes_total": 3528, "timestamp": 1756430800, "episode_reward_mean": -55.20145012855225, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_03-26-40", "policy_reward_mean": {}, "time_this_iter_s": 285.07154655456543, "episodes_this_iter": 24, "training_iteration": 147, "time_total_s": 36382.53430700302, "info": {"num_steps_sampled": 176400, "num_steps_trained": 176400, "default": {"policy_loss": -0.14096269011497498, "vf_explained_var": 0.9250853657722473, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 10.303423881530762, "entropy": 15.09123420715332, "kl": 0.01631304621696472, "total_loss": 10.178977966308594}, "sample_time_ms": 264107.325, "grad_time_ms": 695.414, "load_time_ms": 1.51, "update_time_ms": 2.538}, "timesteps_total": 176400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 176400, "hostname": "cda-server-3", "episode_reward_max": -49.86107777805505}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 36627.90810227394, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -79.94967820860617, "iterations_since_restore": 148, "episodes_total": 3552, "timestamp": 1756431045, "episode_reward_mean": -55.17519078805936, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_03-30-45", "policy_reward_mean": {}, "time_this_iter_s": 245.3737952709198, "episodes_this_iter": 24, "training_iteration": 148, "time_total_s": 36627.90810227394, "info": {"num_steps_sampled": 177600, "num_steps_trained": 177600, "default": {"policy_loss": -0.13445059955120087, "vf_explained_var": 0.8926759362220764, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 16.581031799316406, "entropy": 14.917000770568848, "kl": 0.017347920686006546, "total_loss": 16.46414566040039}, "sample_time_ms": 263589.876, "grad_time_ms": 695.333, "load_time_ms": 1.522, "update_time_ms": 2.577}, "timesteps_total": 177600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 177600, "hostname": "cda-server-3", "episode_reward_max": -49.868294210863574}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 36868.36815214157, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -83.15647612467512, "iterations_since_restore": 149, "episodes_total": 3576, "timestamp": 1756431286, "episode_reward_mean": -55.496444152964315, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_03-34-46", "policy_reward_mean": {}, "time_this_iter_s": 240.46004986763, "episodes_this_iter": 24, "training_iteration": 149, "time_total_s": 36868.36815214157, "info": {"num_steps_sampled": 178800, "num_steps_trained": 178800, "default": {"policy_loss": -0.1339377909898758, "vf_explained_var": 0.8692839741706848, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 22.630189895629883, "entropy": 14.89309310913086, "kl": 0.014757196418941021, "total_loss": 22.511194229125977}, "sample_time_ms": 264588.746, "grad_time_ms": 695.936, "load_time_ms": 1.571, "update_time_ms": 2.617}, "timesteps_total": 178800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 178800, "hostname": "cda-server-3", "episode_reward_max": -49.868294210863574}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 37109.659499168396, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -83.15647612467512, "iterations_since_restore": 150, "episodes_total": 3600, "timestamp": 1756431527, "episode_reward_mean": -54.72428538909617, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_03-38-47", "policy_reward_mean": {}, "time_this_iter_s": 241.29134702682495, "episodes_this_iter": 24, "training_iteration": 150, "time_total_s": 37109.659499168396, "info": {"num_steps_sampled": 180000, "num_steps_trained": 180000, "default": {"policy_loss": -0.12940925359725952, "vf_explained_var": 0.9131262302398682, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 14.03437614440918, "entropy": 14.959555625915527, "kl": 0.016797177493572235, "total_loss": 13.92197322845459}, "sample_time_ms": 265853.553, "grad_time_ms": 696.881, "load_time_ms": 1.582, "update_time_ms": 2.599}, "timesteps_total": 180000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 180000, "hostname": "cda-server-3", "episode_reward_max": -50.103271334104306}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 37372.81824541092, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -83.15647612467512, "iterations_since_restore": 151, "episodes_total": 3624, "timestamp": 1756431790, "episode_reward_mean": -54.95631083997977, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_03-43-10", "policy_reward_mean": {}, "time_this_iter_s": 263.1587462425232, "episodes_this_iter": 24, "training_iteration": 151, "time_total_s": 37372.81824541092, "info": {"num_steps_sampled": 181200, "num_steps_trained": 181200, "default": {"policy_loss": -0.13383673131465912, "vf_explained_var": 0.9046041965484619, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 14.824883460998535, "entropy": 15.047348022460938, "kl": 0.015503380447626114, "total_loss": 14.706741333007812}, "sample_time_ms": 263094.858, "grad_time_ms": 697.053, "load_time_ms": 1.576, "update_time_ms": 2.585}, "timesteps_total": 181200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 181200, "hostname": "cda-server-3", "episode_reward_max": -50.103271334104306}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 37592.00878381729, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -83.15647612467512, "iterations_since_restore": 152, "episodes_total": 3648, "timestamp": 1756432009, "episode_reward_mean": -55.042814484307165, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_03-46-49", "policy_reward_mean": {}, "time_this_iter_s": 219.19053840637207, "episodes_this_iter": 24, "training_iteration": 152, "time_total_s": 37592.00878381729, "info": {"num_steps_sampled": 182400, "num_steps_trained": 182400, "default": {"policy_loss": -0.14624041318893433, "vf_explained_var": 0.8944531679153442, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 18.525949478149414, "entropy": 15.080928802490234, "kl": 0.017322639003396034, "total_loss": 18.397249221801758}, "sample_time_ms": 257547.52, "grad_time_ms": 697.159, "load_time_ms": 1.677, "update_time_ms": 2.615}, "timesteps_total": 182400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 182400, "hostname": "cda-server-3", "episode_reward_max": -50.103271334104306}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 37824.8251748085, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -76.25276652916195, "iterations_since_restore": 153, "episodes_total": 3672, "timestamp": 1756432242, "episode_reward_mean": -54.80760574942528, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_03-50-42", "policy_reward_mean": {}, "time_this_iter_s": 232.81639099121094, "episodes_this_iter": 24, "training_iteration": 153, "time_total_s": 37824.8251748085, "info": {"num_steps_sampled": 183600, "num_steps_trained": 183600, "default": {"policy_loss": -0.12011555582284927, "vf_explained_var": 0.9176934361457825, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 13.568841934204102, "entropy": 14.749469757080078, "kl": 0.015085036866366863, "total_loss": 13.464000701904297}, "sample_time_ms": 252867.137, "grad_time_ms": 696.913, "load_time_ms": 1.679, "update_time_ms": 2.613}, "timesteps_total": 183600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 183600, "hostname": "cda-server-3", "episode_reward_max": -51.635439929331795}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 38087.21182632446, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -76.25276652916195, "iterations_since_restore": 154, "episodes_total": 3696, "timestamp": 1756432505, "episode_reward_mean": -54.710018271540406, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_03-55-05", "policy_reward_mean": {}, "time_this_iter_s": 262.3866515159607, "episodes_this_iter": 24, "training_iteration": 154, "time_total_s": 38087.21182632446, "info": {"num_steps_sampled": 184800, "num_steps_trained": 184800, "default": {"policy_loss": -0.11606475710868835, "vf_explained_var": 0.9142285585403442, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 11.576321601867676, "entropy": 14.832953453063965, "kl": 0.01606798730790615, "total_loss": 11.476527214050293}, "sample_time_ms": 253442.364, "grad_time_ms": 697.813, "load_time_ms": 1.578, "update_time_ms": 2.592}, "timesteps_total": 184800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 184800, "hostname": "cda-server-3", "episode_reward_max": -51.69439838421866}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 38328.99079108238, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -71.88802226923642, "iterations_since_restore": 155, "episodes_total": 3720, "timestamp": 1756432746, "episode_reward_mean": -54.24950328876382, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_03-59-06", "policy_reward_mean": {}, "time_this_iter_s": 241.7789647579193, "episodes_this_iter": 24, "training_iteration": 155, "time_total_s": 38328.99079108238, "info": {"num_steps_sampled": 186000, "num_steps_trained": 186000, "default": {"policy_loss": -0.12019169330596924, "vf_explained_var": 0.9199265241622925, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 10.817547798156738, "entropy": 14.937190055847168, "kl": 0.0172748900949955, "total_loss": 10.714847564697266}, "sample_time_ms": 249993.188, "grad_time_ms": 697.826, "load_time_ms": 1.591, "update_time_ms": 2.591}, "timesteps_total": 186000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 186000, "hostname": "cda-server-3", "episode_reward_max": -51.69439838421866}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 38605.58489322662, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -60.94899705446273, "iterations_since_restore": 156, "episodes_total": 3744, "timestamp": 1756433023, "episode_reward_mean": -53.82121373845912, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_04-03-43", "policy_reward_mean": {}, "time_this_iter_s": 276.59410214424133, "episodes_this_iter": 24, "training_iteration": 156, "time_total_s": 38605.58489322662, "info": {"num_steps_sampled": 187200, "num_steps_trained": 187200, "default": {"policy_loss": -0.11848673224449158, "vf_explained_var": 0.9233921766281128, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 11.039652824401855, "entropy": 14.721104621887207, "kl": 0.016296055167913437, "total_loss": 10.937665939331055}, "sample_time_ms": 250104.087, "grad_time_ms": 698.824, "load_time_ms": 1.582, "update_time_ms": 2.591}, "timesteps_total": 187200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 187200, "hostname": "cda-server-3", "episode_reward_max": -51.69439838421866}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 38854.769364118576, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -87.87251747175968, "iterations_since_restore": 157, "episodes_total": 3768, "timestamp": 1756433272, "episode_reward_mean": -54.420660136849435, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_04-07-52", "policy_reward_mean": {}, "time_this_iter_s": 249.18447089195251, "episodes_this_iter": 24, "training_iteration": 157, "time_total_s": 38854.769364118576, "info": {"num_steps_sampled": 188400, "num_steps_trained": 188400, "default": {"policy_loss": -0.11602246761322021, "vf_explained_var": 0.8534746766090393, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 34.05124282836914, "entropy": 14.746952056884766, "kl": 0.014781979843974113, "total_loss": 33.9501838684082}, "sample_time_ms": 246516.264, "grad_time_ms": 697.965, "load_time_ms": 1.588, "update_time_ms": 2.589}, "timesteps_total": 188400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 188400, "hostname": "cda-server-3", "episode_reward_max": -51.19677159146877}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 39079.19603562355, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -87.87251747175968, "iterations_since_restore": 158, "episodes_total": 3792, "timestamp": 1756433497, "episode_reward_mean": -54.49077811088377, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_04-11-37", "policy_reward_mean": {}, "time_this_iter_s": 224.42667150497437, "episodes_this_iter": 24, "training_iteration": 158, "time_total_s": 39079.19603562355, "info": {"num_steps_sampled": 189600, "num_steps_trained": 189600, "default": {"policy_loss": -0.1355181485414505, "vf_explained_var": 0.9379551410675049, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 9.295769691467285, "entropy": 14.72548770904541, "kl": 0.015183514915406704, "total_loss": 9.17562484741211}, "sample_time_ms": 244421.345, "grad_time_ms": 698.161, "load_time_ms": 1.564, "update_time_ms": 2.575}, "timesteps_total": 189600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 189600, "hostname": "cda-server-3", "episode_reward_max": -51.19677159146877}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 39353.902054309845, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -87.87251747175968, "iterations_since_restore": 159, "episodes_total": 3816, "timestamp": 1756433771, "episode_reward_mean": -54.94807630013864, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_04-16-11", "policy_reward_mean": {}, "time_this_iter_s": 274.70601868629456, "episodes_this_iter": 24, "training_iteration": 159, "time_total_s": 39353.902054309845, "info": {"num_steps_sampled": 190800, "num_steps_trained": 190800, "default": {"policy_loss": -0.13363111019134521, "vf_explained_var": 0.8988499045372009, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 15.78367805480957, "entropy": 14.716657638549805, "kl": 0.015655651688575745, "total_loss": 15.665897369384766}, "sample_time_ms": 247846.641, "grad_time_ms": 697.463, "load_time_ms": 1.576, "update_time_ms": 2.555}, "timesteps_total": 190800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 190800, "hostname": "cda-server-3", "episode_reward_max": -51.19677159146877}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 39582.731301784515, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -87.87251747175968, "iterations_since_restore": 160, "episodes_total": 3840, "timestamp": 1756434000, "episode_reward_mean": -54.99390824289015, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_04-20-00", "policy_reward_mean": {}, "time_this_iter_s": 228.8292474746704, "episodes_this_iter": 24, "training_iteration": 160, "time_total_s": 39582.731301784515, "info": {"num_steps_sampled": 192000, "num_steps_trained": 192000, "default": {"policy_loss": -0.13071568310260773, "vf_explained_var": 0.8984204530715942, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 14.689178466796875, "entropy": 14.589058876037598, "kl": 0.016704510897397995, "total_loss": 14.575374603271484}, "sample_time_ms": 246600.492, "grad_time_ms": 697.379, "load_time_ms": 1.582, "update_time_ms": 2.557}, "timesteps_total": 192000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 192000, "hostname": "cda-server-3", "episode_reward_max": -51.07453569163501}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 39832.147840976715, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -72.07512178954435, "iterations_since_restore": 161, "episodes_total": 3864, "timestamp": 1756434250, "episode_reward_mean": -54.470317514482815, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_04-24-10", "policy_reward_mean": {}, "time_this_iter_s": 249.4165391921997, "episodes_this_iter": 24, "training_iteration": 161, "time_total_s": 39832.147840976715, "info": {"num_steps_sampled": 193200, "num_steps_trained": 193200, "default": {"policy_loss": -0.12139880657196045, "vf_explained_var": 0.9051095247268677, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 13.678767204284668, "entropy": 14.655537605285645, "kl": 0.015876276418566704, "total_loss": 13.573442459106445}, "sample_time_ms": 245226.549, "grad_time_ms": 697.164, "load_time_ms": 1.581, "update_time_ms": 2.564}, "timesteps_total": 193200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 193200, "hostname": "cda-server-3", "episode_reward_max": -49.92447552813607}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 40083.27506804466, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -72.07512178954435, "iterations_since_restore": 162, "episodes_total": 3888, "timestamp": 1756434501, "episode_reward_mean": -54.33385886284182, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_04-28-21", "policy_reward_mean": {}, "time_this_iter_s": 251.1272270679474, "episodes_this_iter": 24, "training_iteration": 162, "time_total_s": 40083.27506804466, "info": {"num_steps_sampled": 194400, "num_steps_trained": 194400, "default": {"policy_loss": -0.11639168858528137, "vf_explained_var": 0.8642103672027588, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 18.328638076782227, "entropy": 14.6741943359375, "kl": 0.016865216195583344, "total_loss": 18.22932243347168}, "sample_time_ms": 248419.243, "grad_time_ms": 698.138, "load_time_ms": 1.585, "update_time_ms": 2.543}, "timesteps_total": 194400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 194400, "hostname": "cda-server-3", "episode_reward_max": -49.92447552813607}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 40346.9060986042, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -65.13800180278425, "iterations_since_restore": 163, "episodes_total": 3912, "timestamp": 1756434764, "episode_reward_mean": -53.99017250932294, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_04-32-44", "policy_reward_mean": {}, "time_this_iter_s": 263.6310305595398, "episodes_this_iter": 24, "training_iteration": 163, "time_total_s": 40346.9060986042, "info": {"num_steps_sampled": 195600, "num_steps_trained": 195600, "default": {"policy_loss": -0.0998261496424675, "vf_explained_var": 0.8992434740066528, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 13.860114097595215, "entropy": 14.415943145751953, "kl": 0.016176464036107063, "total_loss": 13.776667594909668}, "sample_time_ms": 251501.186, "grad_time_ms": 697.607, "load_time_ms": 1.609, "update_time_ms": 2.552}, "timesteps_total": 195600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 195600, "hostname": "cda-server-3", "episode_reward_max": -49.92447552813607}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 40603.62238764763, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -65.93216349559958, "iterations_since_restore": 164, "episodes_total": 3936, "timestamp": 1756435021, "episode_reward_mean": -53.87998544779606, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_04-37-01", "policy_reward_mean": {}, "time_this_iter_s": 256.7162890434265, "episodes_this_iter": 24, "training_iteration": 164, "time_total_s": 40603.62238764763, "info": {"num_steps_sampled": 196800, "num_steps_trained": 196800, "default": {"policy_loss": -0.14119286835193634, "vf_explained_var": 0.8982493281364441, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 14.692657470703125, "entropy": 14.567020416259766, "kl": 0.01636369712650776, "total_loss": 14.568032264709473}, "sample_time_ms": 250935.353, "grad_time_ms": 696.368, "load_time_ms": 1.616, "update_time_ms": 2.555}, "timesteps_total": 196800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 196800, "hostname": "cda-server-3", "episode_reward_max": -49.92447552813607}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 40852.17313194275, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -65.93216349559958, "iterations_since_restore": 165, "episodes_total": 3960, "timestamp": 1756435270, "episode_reward_mean": -53.61150029783123, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_04-41-10", "policy_reward_mean": {}, "time_this_iter_s": 248.55074429512024, "episodes_this_iter": 24, "training_iteration": 165, "time_total_s": 40852.17313194275, "info": {"num_steps_sampled": 198000, "num_steps_trained": 198000, "default": {"policy_loss": -0.1300608515739441, "vf_explained_var": 0.9516932368278503, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 6.523504257202148, "entropy": 14.476093292236328, "kl": 0.017224567010998726, "total_loss": 6.410882949829102}, "sample_time_ms": 251613.146, "grad_time_ms": 695.773, "load_time_ms": 1.634, "update_time_ms": 2.543}, "timesteps_total": 198000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 198000, "hostname": "cda-server-3", "episode_reward_max": -50.33426657153577}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 41124.5479888916, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -65.93216349559958, "iterations_since_restore": 166, "episodes_total": 3984, "timestamp": 1756435542, "episode_reward_mean": -53.50584114911244, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_04-45-42", "policy_reward_mean": {}, "time_this_iter_s": 272.37485694885254, "episodes_this_iter": 24, "training_iteration": 166, "time_total_s": 41124.5479888916, "info": {"num_steps_sampled": 199200, "num_steps_trained": 199200, "default": {"policy_loss": -0.13436605036258698, "vf_explained_var": 0.9507731199264526, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 6.58724308013916, "entropy": 14.292543411254883, "kl": 0.016184302046895027, "total_loss": 6.469264030456543}, "sample_time_ms": 251192.499, "grad_time_ms": 694.461, "load_time_ms": 1.646, "update_time_ms": 2.536}, "timesteps_total": 199200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 199200, "hostname": "cda-server-3", "episode_reward_max": -50.143069802916855}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 41352.61390995979, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -65.93216349559958, "iterations_since_restore": 167, "episodes_total": 4008, "timestamp": 1756435770, "episode_reward_mean": -53.56339851585321, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_04-49-30", "policy_reward_mean": {}, "time_this_iter_s": 228.06592106819153, "episodes_this_iter": 24, "training_iteration": 167, "time_total_s": 41352.61390995979, "info": {"num_steps_sampled": 200400, "num_steps_trained": 200400, "default": {"policy_loss": -0.1314341276884079, "vf_explained_var": 0.9463107585906982, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 7.555251121520996, "entropy": 14.319255828857422, "kl": 0.016974905505776405, "total_loss": 7.441004276275635}, "sample_time_ms": 249078.991, "grad_time_ms": 696.199, "load_time_ms": 1.574, "update_time_ms": 2.54}, "timesteps_total": 200400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 200400, "hostname": "cda-server-3", "episode_reward_max": -50.143069802916855}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 41594.20011138916, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -61.94201876237128, "iterations_since_restore": 168, "episodes_total": 4032, "timestamp": 1756436012, "episode_reward_mean": -53.30442686539963, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_04-53-32", "policy_reward_mean": {}, "time_this_iter_s": 241.58620142936707, "episodes_this_iter": 24, "training_iteration": 168, "time_total_s": 41594.20011138916, "info": {"num_steps_sampled": 201600, "num_steps_trained": 201600, "default": {"policy_loss": -0.12491725385189056, "vf_explained_var": 0.9548305869102478, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 6.068361759185791, "entropy": 14.498003005981445, "kl": 0.017109356820583344, "total_loss": 5.96076774597168}, "sample_time_ms": 250794.892, "grad_time_ms": 696.286, "load_time_ms": 1.578, "update_time_ms": 2.534}, "timesteps_total": 201600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 201600, "hostname": "cda-server-3", "episode_reward_max": -50.01706107894995}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 41838.673221588135, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -70.72055208052299, "iterations_since_restore": 169, "episodes_total": 4056, "timestamp": 1756436256, "episode_reward_mean": -53.589283976993016, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_04-57-36", "policy_reward_mean": {}, "time_this_iter_s": 244.4731101989746, "episodes_this_iter": 24, "training_iteration": 169, "time_total_s": 41838.673221588135, "info": {"num_steps_sampled": 202800, "num_steps_trained": 202800, "default": {"policy_loss": -0.12954078614711761, "vf_explained_var": 0.9378258585929871, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 9.549816131591797, "entropy": 14.280044555664062, "kl": 0.016108253970742226, "total_loss": 9.43658447265625}, "sample_time_ms": 247771.674, "grad_time_ms": 696.205, "load_time_ms": 1.54, "update_time_ms": 2.535}, "timesteps_total": 202800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 202800, "hostname": "cda-server-3", "episode_reward_max": -50.01706107894995}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 42089.681601285934, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -70.72055208052299, "iterations_since_restore": 170, "episodes_total": 4080, "timestamp": 1756436507, "episode_reward_mean": -53.57962096219589, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_05-01-47", "policy_reward_mean": {}, "time_this_iter_s": 251.00837969779968, "episodes_this_iter": 24, "training_iteration": 170, "time_total_s": 42089.681601285934, "info": {"num_steps_sampled": 204000, "num_steps_trained": 204000, "default": {"policy_loss": -0.12321165949106216, "vf_explained_var": 0.9296780824661255, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 10.322514533996582, "entropy": 14.262398719787598, "kl": 0.014291416853666306, "total_loss": 10.21377182006836}, "sample_time_ms": 249990.139, "grad_time_ms": 695.714, "load_time_ms": 1.488, "update_time_ms": 2.565}, "timesteps_total": 204000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 204000, "hostname": "cda-server-3", "episode_reward_max": -49.0508869398342}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 42328.8942193985, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -70.72055208052299, "iterations_since_restore": 171, "episodes_total": 4104, "timestamp": 1756436747, "episode_reward_mean": -53.66866427174036, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_05-05-47", "policy_reward_mean": {}, "time_this_iter_s": 239.2126181125641, "episodes_this_iter": 24, "training_iteration": 171, "time_total_s": 42328.8942193985, "info": {"num_steps_sampled": 205200, "num_steps_trained": 205200, "default": {"policy_loss": -0.12857241928577423, "vf_explained_var": 0.9397585988044739, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 8.941105842590332, "entropy": 14.35903263092041, "kl": 0.016312314197421074, "total_loss": 8.82905101776123}, "sample_time_ms": 248969.415, "grad_time_ms": 696.245, "load_time_ms": 1.408, "update_time_ms": 2.521}, "timesteps_total": 205200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 205200, "hostname": "cda-server-3", "episode_reward_max": -49.0508869398342}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 42626.244643211365, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -70.72055208052299, "iterations_since_restore": 172, "episodes_total": 4128, "timestamp": 1756437044, "episode_reward_mean": -53.580794914051395, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_05-10-44", "policy_reward_mean": {}, "time_this_iter_s": 297.3504238128662, "episodes_this_iter": 24, "training_iteration": 172, "time_total_s": 42626.244643211365, "info": {"num_steps_sampled": 206400, "num_steps_trained": 206400, "default": {"policy_loss": -0.12440269440412521, "vf_explained_var": 0.9441279172897339, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 7.573556900024414, "entropy": 14.359490394592285, "kl": 0.016533873975276947, "total_loss": 7.46589469909668}, "sample_time_ms": 253592.02, "grad_time_ms": 695.983, "load_time_ms": 1.393, "update_time_ms": 2.517}, "timesteps_total": 206400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 206400, "hostname": "cda-server-3", "episode_reward_max": -49.0508869398342}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 42860.58568787575, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -70.72055208052299, "iterations_since_restore": 173, "episodes_total": 4152, "timestamp": 1756437278, "episode_reward_mean": -53.7161237568239, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_05-14-38", "policy_reward_mean": {}, "time_this_iter_s": 234.34104466438293, "episodes_this_iter": 24, "training_iteration": 173, "time_total_s": 42860.58568787575, "info": {"num_steps_sampled": 207600, "num_steps_trained": 207600, "default": {"policy_loss": -0.12295085936784744, "vf_explained_var": 0.9145262837409973, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 12.39100170135498, "entropy": 14.055234909057617, "kl": 0.016166819259524345, "total_loss": 12.284420013427734}, "sample_time_ms": 250662.754, "grad_time_ms": 696.223, "load_time_ms": 1.405, "update_time_ms": 2.533}, "timesteps_total": 207600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 207600, "hostname": "cda-server-3", "episode_reward_max": -49.0508869398342}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 43113.22520804405, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -86.90779398729012, "iterations_since_restore": 174, "episodes_total": 4176, "timestamp": 1756437531, "episode_reward_mean": -53.890788490715124, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_05-18-51", "policy_reward_mean": {}, "time_this_iter_s": 252.63952016830444, "episodes_this_iter": 24, "training_iteration": 174, "time_total_s": 43113.22520804405, "info": {"num_steps_sampled": 208800, "num_steps_trained": 208800, "default": {"policy_loss": -0.13327403366565704, "vf_explained_var": 0.9170873165130615, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 14.936214447021484, "entropy": 14.28572940826416, "kl": 0.014670169912278652, "total_loss": 14.817794799804688}, "sample_time_ms": 250254.236, "grad_time_ms": 697.167, "load_time_ms": 1.331, "update_time_ms": 2.562}, "timesteps_total": 208800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 208800, "hostname": "cda-server-3", "episode_reward_max": -50.13486725085076}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 43354.569568157196, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -86.90779398729012, "iterations_since_restore": 175, "episodes_total": 4200, "timestamp": 1756437772, "episode_reward_mean": -53.70484142252989, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_05-22-52", "policy_reward_mean": {}, "time_this_iter_s": 241.34436011314392, "episodes_this_iter": 24, "training_iteration": 175, "time_total_s": 43354.569568157196, "info": {"num_steps_sampled": 210000, "num_steps_trained": 210000, "default": {"policy_loss": -0.1241101399064064, "vf_explained_var": 0.928949773311615, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 10.733738899230957, "entropy": 14.182540893554688, "kl": 0.01592331938445568, "total_loss": 10.625751495361328}, "sample_time_ms": 249532.855, "grad_time_ms": 697.821, "load_time_ms": 1.38, "update_time_ms": 2.569}, "timesteps_total": 210000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 210000, "hostname": "cda-server-3", "episode_reward_max": -50.13486725085076}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 43621.62365627289, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -86.90779398729012, "iterations_since_restore": 176, "episodes_total": 4224, "timestamp": 1756438039, "episode_reward_mean": -53.60701516354529, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_05-27-19", "policy_reward_mean": {}, "time_this_iter_s": 267.05408811569214, "episodes_this_iter": 24, "training_iteration": 176, "time_total_s": 43621.62365627289, "info": {"num_steps_sampled": 211200, "num_steps_trained": 211200, "default": {"policy_loss": -0.12065468728542328, "vf_explained_var": 0.9261561632156372, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 10.087909698486328, "entropy": 13.944937705993652, "kl": 0.015050739049911499, "total_loss": 9.98249340057373}, "sample_time_ms": 249000.493, "grad_time_ms": 698.123, "load_time_ms": 1.361, "update_time_ms": 2.578}, "timesteps_total": 211200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 211200, "hostname": "cda-server-3", "episode_reward_max": -50.13486725085076}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 43901.720831632614, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -86.90779398729012, "iterations_since_restore": 177, "episodes_total": 4248, "timestamp": 1756438319, "episode_reward_mean": -53.543342405927405, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_05-31-59", "policy_reward_mean": {}, "time_this_iter_s": 280.09717535972595, "episodes_this_iter": 24, "training_iteration": 177, "time_total_s": 43901.720831632614, "info": {"num_steps_sampled": 212400, "num_steps_trained": 212400, "default": {"policy_loss": -0.1298007220029831, "vf_explained_var": 0.9530531764030457, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 6.61334228515625, "entropy": 14.069295883178711, "kl": 0.01683618873357773, "total_loss": 6.500588417053223}, "sample_time_ms": 254205.562, "grad_time_ms": 696.216, "load_time_ms": 1.348, "update_time_ms": 2.603}, "timesteps_total": 212400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 212400, "hostname": "cda-server-3", "episode_reward_max": -50.69632375703871}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 44153.83974337578, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -66.96402946455778, "iterations_since_restore": 178, "episodes_total": 4272, "timestamp": 1756438572, "episode_reward_mean": -53.031808792535166, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_05-36-12", "policy_reward_mean": {}, "time_this_iter_s": 252.11891174316406, "episodes_this_iter": 24, "training_iteration": 178, "time_total_s": 44153.83974337578, "info": {"num_steps_sampled": 213600, "num_steps_trained": 213600, "default": {"policy_loss": -0.130199134349823, "vf_explained_var": 0.9504425525665283, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 6.463962078094482, "entropy": 13.97944164276123, "kl": 0.01592904143035412, "total_loss": 6.349891185760498}, "sample_time_ms": 255259.466, "grad_time_ms": 695.531, "load_time_ms": 1.36, "update_time_ms": 2.621}, "timesteps_total": 213600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 213600, "hostname": "cda-server-3", "episode_reward_max": -49.36445515990393}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 44386.934242248535, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -66.96402946455778, "iterations_since_restore": 179, "episodes_total": 4296, "timestamp": 1756438805, "episode_reward_mean": -53.042538560292826, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_05-40-05", "policy_reward_mean": {}, "time_this_iter_s": 233.09449887275696, "episodes_this_iter": 24, "training_iteration": 179, "time_total_s": 44386.934242248535, "info": {"num_steps_sampled": 214800, "num_steps_trained": 214800, "default": {"policy_loss": -0.13994605839252472, "vf_explained_var": 0.9725171327590942, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 3.6190366744995117, "entropy": 13.99028205871582, "kl": 0.01614346355199814, "total_loss": 3.495435953140259}, "sample_time_ms": 254121.206, "grad_time_ms": 695.892, "load_time_ms": 1.397, "update_time_ms": 2.636}, "timesteps_total": 214800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 214800, "hostname": "cda-server-3", "episode_reward_max": -49.36445515990393}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 44641.26664805412, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -66.96402946455778, "iterations_since_restore": 180, "episodes_total": 4320, "timestamp": 1756439059, "episode_reward_mean": -53.1373632716962, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_05-44-19", "policy_reward_mean": {}, "time_this_iter_s": 254.33240580558777, "episodes_this_iter": 24, "training_iteration": 180, "time_total_s": 44641.26664805412, "info": {"num_steps_sampled": 216000, "num_steps_trained": 216000, "default": {"policy_loss": -0.1175423189997673, "vf_explained_var": 0.9335753917694092, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 9.291361808776855, "entropy": 13.877095222473145, "kl": 0.015891285613179207, "total_loss": 9.189908981323242}, "sample_time_ms": 254453.815, "grad_time_ms": 695.543, "load_time_ms": 1.494, "update_time_ms": 2.601}, "timesteps_total": 216000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 216000, "hostname": "cda-server-3", "episode_reward_max": -49.36445515990393}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 44861.4182267189, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -94.56750234999927, "iterations_since_restore": 181, "episodes_total": 4344, "timestamp": 1756439279, "episode_reward_mean": -53.63649838877152, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_05-47-59", "policy_reward_mean": {}, "time_this_iter_s": 220.15157866477966, "episodes_this_iter": 24, "training_iteration": 181, "time_total_s": 44861.4182267189, "info": {"num_steps_sampled": 217200, "num_steps_trained": 217200, "default": {"policy_loss": -0.11929008364677429, "vf_explained_var": 0.9169760942459106, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 14.88999080657959, "entropy": 13.808137893676758, "kl": 0.013675041496753693, "total_loss": 14.7845458984375}, "sample_time_ms": 252547.651, "grad_time_ms": 695.443, "load_time_ms": 1.57, "update_time_ms": 2.619}, "timesteps_total": 217200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 217200, "hostname": "cda-server-3", "episode_reward_max": -49.36445515990393}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 45094.17157244682, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -94.56750234999927, "iterations_since_restore": 182, "episodes_total": 4368, "timestamp": 1756439512, "episode_reward_mean": -53.8451041786324, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_05-51-52", "policy_reward_mean": {}, "time_this_iter_s": 232.75334572792053, "episodes_this_iter": 24, "training_iteration": 182, "time_total_s": 45094.17157244682, "info": {"num_steps_sampled": 218400, "num_steps_trained": 218400, "default": {"policy_loss": -0.1277659684419632, "vf_explained_var": 0.9497382044792175, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 7.0899529457092285, "entropy": 13.709293365478516, "kl": 0.015457798726856709, "total_loss": 6.977838516235352}, "sample_time_ms": 246088.625, "grad_time_ms": 694.91, "load_time_ms": 1.472, "update_time_ms": 2.614}, "timesteps_total": 218400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 218400, "hostname": "cda-server-3", "episode_reward_max": -49.08233276373182}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 45367.272315979004, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -94.56750234999927, "iterations_since_restore": 183, "episodes_total": 4392, "timestamp": 1756439785, "episode_reward_mean": -54.12122982188653, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_05-56-25", "policy_reward_mean": {}, "time_this_iter_s": 273.1007435321808, "episodes_this_iter": 24, "training_iteration": 183, "time_total_s": 45367.272315979004, "info": {"num_steps_sampled": 219600, "num_steps_trained": 219600, "default": {"policy_loss": -0.11586789041757584, "vf_explained_var": 0.8821346759796143, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 20.151500701904297, "entropy": 13.90664291381836, "kl": 0.012427722103893757, "total_loss": 20.048213958740234}, "sample_time_ms": 249965.472, "grad_time_ms": 694.231, "load_time_ms": 1.356, "update_time_ms": 2.572}, "timesteps_total": 219600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 219600, "hostname": "cda-server-3", "episode_reward_max": -49.08233276373182}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 45608.537001371384, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -94.56750234999927, "iterations_since_restore": 184, "episodes_total": 4416, "timestamp": 1756440026, "episode_reward_mean": -54.261823213783686, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_06-00-26", "policy_reward_mean": {}, "time_this_iter_s": 241.26468539237976, "episodes_this_iter": 24, "training_iteration": 184, "time_total_s": 45608.537001371384, "info": {"num_steps_sampled": 220800, "num_steps_trained": 220800, "default": {"policy_loss": -0.1134781688451767, "vf_explained_var": 0.9582895636558533, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 6.243759632110596, "entropy": 13.678974151611328, "kl": 0.013173202984035015, "total_loss": 6.143619060516357}, "sample_time_ms": 248827.732, "grad_time_ms": 694.495, "load_time_ms": 1.368, "update_time_ms": 2.567}, "timesteps_total": 220800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 220800, "hostname": "cda-server-3", "episode_reward_max": -49.08233276373182}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 45841.35560679436, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -80.45771722108525, "iterations_since_restore": 185, "episodes_total": 4440, "timestamp": 1756440259, "episode_reward_mean": -53.90100144491721, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_06-04-19", "policy_reward_mean": {}, "time_this_iter_s": 232.81860542297363, "episodes_this_iter": 24, "training_iteration": 185, "time_total_s": 45841.35560679436, "info": {"num_steps_sampled": 222000, "num_steps_trained": 222000, "default": {"policy_loss": -0.12608960270881653, "vf_explained_var": 0.9605620503425598, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 5.352666854858398, "entropy": 13.728774070739746, "kl": 0.016028843820095062, "total_loss": 5.242806911468506}, "sample_time_ms": 247974.951, "grad_time_ms": 694.704, "load_time_ms": 1.354, "update_time_ms": 2.577}, "timesteps_total": 222000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 222000, "hostname": "cda-server-3", "episode_reward_max": -49.08233276373182}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 46075.51358270645, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -103.01053707639123, "iterations_since_restore": 186, "episodes_total": 4464, "timestamp": 1756440493, "episode_reward_mean": -54.77867174185004, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_06-08-13", "policy_reward_mean": {}, "time_this_iter_s": 234.15797591209412, "episodes_this_iter": 24, "training_iteration": 186, "time_total_s": 46075.51358270645, "info": {"num_steps_sampled": 223200, "num_steps_trained": 223200, "default": {"policy_loss": -0.1334741711616516, "vf_explained_var": 0.8342825174331665, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 47.81349563598633, "entropy": 13.694595336914062, "kl": 0.012813089415431023, "total_loss": 47.69300079345703}, "sample_time_ms": 244685.472, "grad_time_ms": 694.641, "load_time_ms": 1.328, "update_time_ms": 2.575}, "timesteps_total": 223200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 223200, "hostname": "cda-server-3", "episode_reward_max": -49.99546774844703}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 46318.27295923233, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -103.01053707639123, "iterations_since_restore": 187, "episodes_total": 4488, "timestamp": 1756440736, "episode_reward_mean": -54.5534802284662, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_06-12-16", "policy_reward_mean": {}, "time_this_iter_s": 242.7593765258789, "episodes_this_iter": 24, "training_iteration": 187, "time_total_s": 46318.27295923233, "info": {"num_steps_sampled": 224400, "num_steps_trained": 224400, "default": {"policy_loss": -0.12978488206863403, "vf_explained_var": 0.9535620212554932, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 6.927732467651367, "entropy": 13.717631340026855, "kl": 0.016308149322867393, "total_loss": 6.814460277557373}, "sample_time_ms": 240951.392, "grad_time_ms": 694.812, "load_time_ms": 1.384, "update_time_ms": 2.555}, "timesteps_total": 224400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 224400, "hostname": "cda-server-3", "episode_reward_max": -49.99546774844703}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 46580.00093770027, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -103.01053707639123, "iterations_since_restore": 188, "episodes_total": 4512, "timestamp": 1756440998, "episode_reward_mean": -54.33199623642102, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_06-16-38", "policy_reward_mean": {}, "time_this_iter_s": 261.7279784679413, "episodes_this_iter": 24, "training_iteration": 188, "time_total_s": 46580.00093770027, "info": {"num_steps_sampled": 225600, "num_steps_trained": 225600, "default": {"policy_loss": -0.14069527387619019, "vf_explained_var": 0.9462458491325378, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 7.247664451599121, "entropy": 13.783607482910156, "kl": 0.017661113291978836, "total_loss": 7.124850749969482}, "sample_time_ms": 241911.574, "grad_time_ms": 695.705, "load_time_ms": 1.292, "update_time_ms": 2.557}, "timesteps_total": 225600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 225600, "hostname": "cda-server-3", "episode_reward_max": -50.05406011084624}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 46799.67392349243, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -103.01053707639123, "iterations_since_restore": 189, "episodes_total": 4536, "timestamp": 1756441217, "episode_reward_mean": -54.52519962590397, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_06-20-17", "policy_reward_mean": {}, "time_this_iter_s": 219.67298579216003, "episodes_this_iter": 24, "training_iteration": 189, "time_total_s": 46799.67392349243, "info": {"num_steps_sampled": 226800, "num_steps_trained": 226800, "default": {"policy_loss": -0.1274680346250534, "vf_explained_var": 0.9378973841667175, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 9.567581176757812, "entropy": 13.739153861999512, "kl": 0.016897717490792274, "total_loss": 9.457221984863281}, "sample_time_ms": 240568.3, "grad_time_ms": 696.809, "load_time_ms": 1.279, "update_time_ms": 2.566}, "timesteps_total": 226800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 226800, "hostname": "cda-server-3", "episode_reward_max": -50.05406011084624}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 47040.63526558876, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -103.01053707639123, "iterations_since_restore": 190, "episodes_total": 4560, "timestamp": 1756441458, "episode_reward_mean": -53.97254179020705, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_06-24-18", "policy_reward_mean": {}, "time_this_iter_s": 240.96134209632874, "episodes_this_iter": 24, "training_iteration": 190, "time_total_s": 47040.63526558876, "info": {"num_steps_sampled": 228000, "num_steps_trained": 228000, "default": {"policy_loss": -0.12781214714050293, "vf_explained_var": 0.9580786824226379, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 6.02968168258667, "entropy": 13.766950607299805, "kl": 0.017274074256420135, "total_loss": 5.9193596839904785}, "sample_time_ms": 239230.328, "grad_time_ms": 697.689, "load_time_ms": 1.28, "update_time_ms": 2.571}, "timesteps_total": 228000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 228000, "hostname": "cda-server-3", "episode_reward_max": -50.59615050914242}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 47280.122878313065, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -101.10936583155627, "iterations_since_restore": 191, "episodes_total": 4584, "timestamp": 1756441698, "episode_reward_mean": -54.279260741314474, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_06-28-18", "policy_reward_mean": {}, "time_this_iter_s": 239.4876127243042, "episodes_this_iter": 24, "training_iteration": 191, "time_total_s": 47280.122878313065, "info": {"num_steps_sampled": 229200, "num_steps_trained": 229200, "default": {"policy_loss": -0.11484278738498688, "vf_explained_var": 0.8638635277748108, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 30.318471908569336, "entropy": 13.744145393371582, "kl": 0.013898147270083427, "total_loss": 30.21769905090332}, "sample_time_ms": 241163.985, "grad_time_ms": 697.753, "load_time_ms": 1.198, "update_time_ms": 2.583}, "timesteps_total": 229200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 229200, "hostname": "cda-server-3", "episode_reward_max": -50.59615050914242}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 47572.10169816017, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -101.10936583155627, "iterations_since_restore": 192, "episodes_total": 4608, "timestamp": 1756441990, "episode_reward_mean": -54.16387812212497, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_06-33-10", "policy_reward_mean": {}, "time_this_iter_s": 291.97881984710693, "episodes_this_iter": 24, "training_iteration": 192, "time_total_s": 47572.10169816017, "info": {"num_steps_sampled": 230400, "num_steps_trained": 230400, "default": {"policy_loss": -0.12756960093975067, "vf_explained_var": 0.9341971278190613, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 9.284131050109863, "entropy": 13.830009460449219, "kl": 0.014379401691257954, "total_loss": 9.171121597290039}, "sample_time_ms": 247086.161, "grad_time_ms": 697.944, "load_time_ms": 1.293, "update_time_ms": 2.578}, "timesteps_total": 230400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 230400, "hostname": "cda-server-3", "episode_reward_max": -50.59615050914242}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 47812.417081832886, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -101.10936583155627, "iterations_since_restore": 193, "episodes_total": 4632, "timestamp": 1756442230, "episode_reward_mean": -53.96007896743721, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_06-37-10", "policy_reward_mean": {}, "time_this_iter_s": 240.31538367271423, "episodes_this_iter": 24, "training_iteration": 193, "time_total_s": 47812.417081832886, "info": {"num_steps_sampled": 231600, "num_steps_trained": 231600, "default": {"policy_loss": -0.12245944887399673, "vf_explained_var": 0.9257941842079163, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 10.380656242370605, "entropy": 13.640023231506348, "kl": 0.015688113868236542, "total_loss": 10.274081230163574}, "sample_time_ms": 243806.656, "grad_time_ms": 698.762, "load_time_ms": 1.389, "update_time_ms": 2.619}, "timesteps_total": 231600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 231600, "hostname": "cda-server-3", "episode_reward_max": -50.59615050914242}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 48095.95903515816, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -101.10936583155627, "iterations_since_restore": 194, "episodes_total": 4656, "timestamp": 1756442514, "episode_reward_mean": -54.306236617855, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_06-41-54", "policy_reward_mean": {}, "time_this_iter_s": 283.5419533252716, "episodes_this_iter": 24, "training_iteration": 194, "time_total_s": 48095.95903515816, "info": {"num_steps_sampled": 232800, "num_steps_trained": 232800, "default": {"policy_loss": -0.1166752278804779, "vf_explained_var": 0.8864515423774719, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 21.87663459777832, "entropy": 13.669998168945312, "kl": 0.013523032888770103, "total_loss": 21.773651123046875}, "sample_time_ms": 248034.822, "grad_time_ms": 698.258, "load_time_ms": 1.439, "update_time_ms": 2.586}, "timesteps_total": 232800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 232800, "hostname": "cda-server-3", "episode_reward_max": -50.90256704987865}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 48327.729848623276, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -89.07687021099098, "iterations_since_restore": 195, "episodes_total": 4680, "timestamp": 1756442746, "episode_reward_mean": -53.548609026782785, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_06-45-46", "policy_reward_mean": {}, "time_this_iter_s": 231.7708134651184, "episodes_this_iter": 24, "training_iteration": 195, "time_total_s": 48327.729848623276, "info": {"num_steps_sampled": 234000, "num_steps_trained": 234000, "default": {"policy_loss": -0.10917246341705322, "vf_explained_var": 0.9413497447967529, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 9.061535835266113, "entropy": 13.525612831115723, "kl": 0.013985957019031048, "total_loss": 8.96652603149414}, "sample_time_ms": 247929.484, "grad_time_ms": 698.848, "load_time_ms": 1.42, "update_time_ms": 2.59}, "timesteps_total": 234000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 234000, "hostname": "cda-server-3", "episode_reward_max": -51.49734124044208}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 48549.923015117645, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -89.07687021099098, "iterations_since_restore": 196, "episodes_total": 4704, "timestamp": 1756442968, "episode_reward_mean": -53.54348130786682, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_06-49-28", "policy_reward_mean": {}, "time_this_iter_s": 222.1931664943695, "episodes_this_iter": 24, "training_iteration": 196, "time_total_s": 48549.923015117645, "info": {"num_steps_sampled": 235200, "num_steps_trained": 235200, "default": {"policy_loss": -0.1291184425354004, "vf_explained_var": 0.949661374092102, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 6.641875267028809, "entropy": 13.663838386535645, "kl": 0.015983549878001213, "total_loss": 6.528940200805664}, "sample_time_ms": 246731.703, "grad_time_ms": 700.173, "load_time_ms": 1.401, "update_time_ms": 2.595}, "timesteps_total": 235200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 235200, "hostname": "cda-server-3", "episode_reward_max": -50.9655152146521}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 48802.114077329636, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -89.07687021099098, "iterations_since_restore": 197, "episodes_total": 4728, "timestamp": 1756443220, "episode_reward_mean": -53.34710076680881, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_06-53-40", "policy_reward_mean": {}, "time_this_iter_s": 252.19106221199036, "episodes_this_iter": 24, "training_iteration": 197, "time_total_s": 48802.114077329636, "info": {"num_steps_sampled": 236400, "num_steps_trained": 236400, "default": {"policy_loss": -0.12881432473659515, "vf_explained_var": 0.9463976621627808, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 7.419828414916992, "entropy": 13.690502166748047, "kl": 0.015713712200522423, "total_loss": 7.306924819946289}, "sample_time_ms": 247675.32, "grad_time_ms": 699.706, "load_time_ms": 1.417, "update_time_ms": 2.593}, "timesteps_total": 236400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 236400, "hostname": "cda-server-3", "episode_reward_max": -49.31600089328854}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 49033.07736849785, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -62.32068669276827, "iterations_since_restore": 198, "episodes_total": 4752, "timestamp": 1756443451, "episode_reward_mean": -53.089172422911425, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_06-57-31", "policy_reward_mean": {}, "time_this_iter_s": 230.9632911682129, "episodes_this_iter": 24, "training_iteration": 198, "time_total_s": 49033.07736849785, "info": {"num_steps_sampled": 237600, "num_steps_trained": 237600, "default": {"policy_loss": -0.1262063831090927, "vf_explained_var": 0.9186666011810303, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 11.172323226928711, "entropy": 13.764321327209473, "kl": 0.014617557637393475, "total_loss": 11.060917854309082}, "sample_time_ms": 244599.413, "grad_time_ms": 698.993, "load_time_ms": 1.506, "update_time_ms": 2.574}, "timesteps_total": 237600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 237600, "hostname": "cda-server-3", "episode_reward_max": -49.31600089328854}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 49260.586948394775, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -62.32068669276827, "iterations_since_restore": 199, "episodes_total": 4776, "timestamp": 1756443678, "episode_reward_mean": -52.896120268548586, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_07-01-18", "policy_reward_mean": {}, "time_this_iter_s": 227.50957989692688, "episodes_this_iter": 24, "training_iteration": 199, "time_total_s": 49260.586948394775, "info": {"num_steps_sampled": 238800, "num_steps_trained": 238800, "default": {"policy_loss": -0.131291925907135, "vf_explained_var": 0.9453469514846802, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 7.727341651916504, "entropy": 13.57127571105957, "kl": 0.016682572662830353, "total_loss": 7.612940311431885}, "sample_time_ms": 245384.726, "grad_time_ms": 697.371, "load_time_ms": 1.513, "update_time_ms": 2.578}, "timesteps_total": 238800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 238800, "hostname": "cda-server-3", "episode_reward_max": -49.2260156024492}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 49541.825184345245, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -63.96882214668029, "iterations_since_restore": 200, "episodes_total": 4800, "timestamp": 1756443960, "episode_reward_mean": -53.008628398442994, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_07-06-00", "policy_reward_mean": {}, "time_this_iter_s": 281.23823595046997, "episodes_this_iter": 24, "training_iteration": 200, "time_total_s": 49541.825184345245, "info": {"num_steps_sampled": 240000, "num_steps_trained": 240000, "default": {"policy_loss": -0.12226442247629166, "vf_explained_var": 0.9560834169387817, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 6.315108776092529, "entropy": 13.709651947021484, "kl": 0.014527440071105957, "total_loss": 6.207553386688232}, "sample_time_ms": 249413.369, "grad_time_ms": 696.563, "load_time_ms": 1.415, "update_time_ms": 2.597}, "timesteps_total": 240000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 240000, "hostname": "cda-server-3", "episode_reward_max": -49.2260156024492}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 49783.74181032181, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -63.96882214668029, "iterations_since_restore": 201, "episodes_total": 4824, "timestamp": 1756444202, "episode_reward_mean": -52.91975331889113, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_07-10-02", "policy_reward_mean": {}, "time_this_iter_s": 241.9166259765625, "episodes_this_iter": 24, "training_iteration": 201, "time_total_s": 49783.74181032181, "info": {"num_steps_sampled": 241200, "num_steps_trained": 241200, "default": {"policy_loss": -0.13792450726032257, "vf_explained_var": 0.9654067158699036, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 4.6045002937316895, "entropy": 13.688363075256348, "kl": 0.015577022917568684, "total_loss": 4.4823479652404785}, "sample_time_ms": 249655.821, "grad_time_ms": 696.887, "load_time_ms": 1.501, "update_time_ms": 2.586}, "timesteps_total": 241200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 241200, "hostname": "cda-server-3", "episode_reward_max": -49.2260156024492}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 50030.31158399582, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -105.25511476379766, "iterations_since_restore": 202, "episodes_total": 4848, "timestamp": 1756444448, "episode_reward_mean": -53.19758526213469, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_07-14-08", "policy_reward_mean": {}, "time_this_iter_s": 246.56977367401123, "episodes_this_iter": 24, "training_iteration": 202, "time_total_s": 50030.31158399582, "info": {"num_steps_sampled": 242400, "num_steps_trained": 242400, "default": {"policy_loss": -0.0947578102350235, "vf_explained_var": 0.8161755204200745, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 43.95417785644531, "entropy": 13.589754104614258, "kl": 0.009557071141898632, "total_loss": 43.86909866333008}, "sample_time_ms": 245114.838, "grad_time_ms": 696.965, "load_time_ms": 1.498, "update_time_ms": 2.586}, "timesteps_total": 242400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 242400, "hostname": "cda-server-3", "episode_reward_max": -48.98603498873693}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 50312.89493370056, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -105.25511476379766, "iterations_since_restore": 203, "episodes_total": 4872, "timestamp": 1756444731, "episode_reward_mean": -53.27160994877568, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_07-18-51", "policy_reward_mean": {}, "time_this_iter_s": 282.58334970474243, "episodes_this_iter": 24, "training_iteration": 203, "time_total_s": 50312.89493370056, "info": {"num_steps_sampled": 243600, "num_steps_trained": 243600, "default": {"policy_loss": -0.12207228690385818, "vf_explained_var": 0.9586093425750732, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 5.939465045928955, "entropy": 13.629680633544922, "kl": 0.016084210947155952, "total_loss": 5.833678245544434}, "sample_time_ms": 249342.276, "grad_time_ms": 696.344, "load_time_ms": 1.497, "update_time_ms": 2.542}, "timesteps_total": 243600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 243600, "hostname": "cda-server-3", "episode_reward_max": -48.98603498873693}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 50540.21925139427, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -105.25511476379766, "iterations_since_restore": 204, "episodes_total": 4896, "timestamp": 1756444958, "episode_reward_mean": -53.370897240358936, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_07-22-38", "policy_reward_mean": {}, "time_this_iter_s": 227.32431769371033, "episodes_this_iter": 24, "training_iteration": 204, "time_total_s": 50540.21925139427, "info": {"num_steps_sampled": 244800, "num_steps_trained": 244800, "default": {"policy_loss": -0.134691059589386, "vf_explained_var": 0.9489078521728516, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 6.949552536010742, "entropy": 13.448970794677734, "kl": 0.017023924738168716, "total_loss": 6.832098007202148}, "sample_time_ms": 243720.45, "grad_time_ms": 696.352, "load_time_ms": 1.522, "update_time_ms": 2.551}, "timesteps_total": 244800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 244800, "hostname": "cda-server-3", "episode_reward_max": -48.98603498873693}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 50779.589007377625, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -105.25511476379766, "iterations_since_restore": 205, "episodes_total": 4920, "timestamp": 1756445197, "episode_reward_mean": -53.458245488786794, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_07-26-37", "policy_reward_mean": {}, "time_this_iter_s": 239.36975598335266, "episodes_this_iter": 24, "training_iteration": 205, "time_total_s": 50779.589007377625, "info": {"num_steps_sampled": 246000, "num_steps_trained": 246000, "default": {"policy_loss": -0.12922601401805878, "vf_explained_var": 0.9597580432891846, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 5.400381088256836, "entropy": 13.406126022338867, "kl": 0.017056623473763466, "total_loss": 5.288424968719482}, "sample_time_ms": 244481.14, "grad_time_ms": 695.513, "load_time_ms": 1.551, "update_time_ms": 2.528}, "timesteps_total": 246000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 246000, "hostname": "cda-server-3", "episode_reward_max": -48.98603498873693}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 51067.62697553635, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -58.935624792842, "iterations_since_restore": 206, "episodes_total": 4944, "timestamp": 1756445486, "episode_reward_mean": -53.01138822250478, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_07-31-26", "policy_reward_mean": {}, "time_this_iter_s": 288.0379681587219, "episodes_this_iter": 24, "training_iteration": 206, "time_total_s": 51067.62697553635, "info": {"num_steps_sampled": 247200, "num_steps_trained": 247200, "default": {"policy_loss": -0.1171593964099884, "vf_explained_var": 0.9476562142372131, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 6.902299880981445, "entropy": 13.519577026367188, "kl": 0.01573404110968113, "total_loss": 6.801071643829346}, "sample_time_ms": 251066.806, "grad_time_ms": 694.24, "load_time_ms": 1.59, "update_time_ms": 2.545}, "timesteps_total": 247200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 247200, "hostname": "cda-server-3", "episode_reward_max": -51.01486236176433}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 51325.64721798897, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -59.954047230685426, "iterations_since_restore": 207, "episodes_total": 4968, "timestamp": 1756445744, "episode_reward_mean": -53.17399045538728, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_07-35-44", "policy_reward_mean": {}, "time_this_iter_s": 258.02024245262146, "episodes_this_iter": 24, "training_iteration": 207, "time_total_s": 51325.64721798897, "info": {"num_steps_sampled": 248400, "num_steps_trained": 248400, "default": {"policy_loss": -0.12967216968536377, "vf_explained_var": 0.9467138648033142, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 7.576404094696045, "entropy": 13.4369478225708, "kl": 0.01681762933731079, "total_loss": 7.4637603759765625}, "sample_time_ms": 251649.088, "grad_time_ms": 694.83, "load_time_ms": 1.601, "update_time_ms": 2.567}, "timesteps_total": 248400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 248400, "hostname": "cda-server-3", "episode_reward_max": -51.01486236176433}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 51613.95212769508, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -61.44569830893842, "iterations_since_restore": 208, "episodes_total": 4992, "timestamp": 1756446032, "episode_reward_mean": -53.11367069586581, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_07-40-32", "policy_reward_mean": {}, "time_this_iter_s": 288.3049097061157, "episodes_this_iter": 24, "training_iteration": 208, "time_total_s": 51613.95212769508, "info": {"num_steps_sampled": 249600, "num_steps_trained": 249600, "default": {"policy_loss": -0.12367913126945496, "vf_explained_var": 0.9593546986579895, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 5.667877674102783, "entropy": 13.433245658874512, "kl": 0.015404744073748589, "total_loss": 5.55979585647583}, "sample_time_ms": 257382.956, "grad_time_ms": 695.117, "load_time_ms": 1.613, "update_time_ms": 2.55}, "timesteps_total": 249600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 249600, "hostname": "cda-server-3", "episode_reward_max": -50.029668242570246}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 51856.40980172157, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -71.96171297636684, "iterations_since_restore": 209, "episodes_total": 5016, "timestamp": 1756446274, "episode_reward_mean": -53.29923892735492, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_07-44-34", "policy_reward_mean": {}, "time_this_iter_s": 242.45767402648926, "episodes_this_iter": 24, "training_iteration": 209, "time_total_s": 51856.40980172157, "info": {"num_steps_sampled": 250800, "num_steps_trained": 250800, "default": {"policy_loss": -0.13099414110183716, "vf_explained_var": 0.9026677012443542, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 15.419004440307617, "entropy": 13.460000991821289, "kl": 0.015934377908706665, "total_loss": 15.304142951965332}, "sample_time_ms": 258877.945, "grad_time_ms": 695.007, "load_time_ms": 1.564, "update_time_ms": 2.538}, "timesteps_total": 250800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 250800, "hostname": "cda-server-3", "episode_reward_max": -48.49890370956543}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 52074.34491252899, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -76.06146106644461, "iterations_since_restore": 210, "episodes_total": 5040, "timestamp": 1756446492, "episode_reward_mean": -53.62399428423448, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_07-48-12", "policy_reward_mean": {}, "time_this_iter_s": 217.93511080741882, "episodes_this_iter": 24, "training_iteration": 210, "time_total_s": 52074.34491252899, "info": {"num_steps_sampled": 252000, "num_steps_trained": 252000, "default": {"policy_loss": -0.12491732090711594, "vf_explained_var": 0.8966451287269592, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 20.819692611694336, "entropy": 13.459056854248047, "kl": 0.014209000393748283, "total_loss": 20.70915985107422}, "sample_time_ms": 252547.281, "grad_time_ms": 695.311, "load_time_ms": 1.597, "update_time_ms": 2.512}, "timesteps_total": 252000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 252000, "hostname": "cda-server-3", "episode_reward_max": -48.49890370956543}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 52310.4198474884, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -88.33545886911972, "iterations_since_restore": 211, "episodes_total": 5064, "timestamp": 1756446728, "episode_reward_mean": -53.6863478223295, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_07-52-08", "policy_reward_mean": {}, "time_this_iter_s": 236.07493495941162, "episodes_this_iter": 24, "training_iteration": 211, "time_total_s": 52310.4198474884, "info": {"num_steps_sampled": 253200, "num_steps_trained": 253200, "default": {"policy_loss": -0.1190461590886116, "vf_explained_var": 0.9395532608032227, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 11.596328735351562, "entropy": 13.170246124267578, "kl": 0.013326210901141167, "total_loss": 11.490775108337402}, "sample_time_ms": 251962.736, "grad_time_ms": 695.664, "load_time_ms": 1.599, "update_time_ms": 2.514}, "timesteps_total": 253200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 253200, "hostname": "cda-server-3", "episode_reward_max": -48.40327379293791}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 52538.71108055115, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -88.33545886911972, "iterations_since_restore": 212, "episodes_total": 5088, "timestamp": 1756446957, "episode_reward_mean": -53.62617516991392, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_07-55-57", "policy_reward_mean": {}, "time_this_iter_s": 228.29123306274414, "episodes_this_iter": 24, "training_iteration": 212, "time_total_s": 52538.71108055115, "info": {"num_steps_sampled": 254400, "num_steps_trained": 254400, "default": {"policy_loss": -0.1376982182264328, "vf_explained_var": 0.9368199110031128, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 9.25528621673584, "entropy": 13.441559791564941, "kl": 0.01579122245311737, "total_loss": 9.133577346801758}, "sample_time_ms": 250135.485, "grad_time_ms": 695.088, "load_time_ms": 1.609, "update_time_ms": 2.514}, "timesteps_total": 254400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 254400, "hostname": "cda-server-3", "episode_reward_max": -48.388893830147204}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 52778.71068429947, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -88.33545886911972, "iterations_since_restore": 213, "episodes_total": 5112, "timestamp": 1756447197, "episode_reward_mean": -54.284821358814376, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_07-59-57", "policy_reward_mean": {}, "time_this_iter_s": 239.99960374832153, "episodes_this_iter": 24, "training_iteration": 213, "time_total_s": 52778.71068429947, "info": {"num_steps_sampled": 255600, "num_steps_trained": 255600, "default": {"policy_loss": -0.13345371186733246, "vf_explained_var": 0.9282054901123047, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 12.236493110656738, "entropy": 13.436868667602539, "kl": 0.01283181644976139, "total_loss": 12.116031646728516}, "sample_time_ms": 245877.083, "grad_time_ms": 695.115, "load_time_ms": 1.61, "update_time_ms": 2.515}, "timesteps_total": 255600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 255600, "hostname": "cda-server-3", "episode_reward_max": -48.388893830147204}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 53024.146672964096, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -88.33545886911972, "iterations_since_restore": 214, "episodes_total": 5136, "timestamp": 1756447442, "episode_reward_mean": -53.82445902246092, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_08-04-02", "policy_reward_mean": {}, "time_this_iter_s": 245.43598866462708, "episodes_this_iter": 24, "training_iteration": 214, "time_total_s": 53024.146672964096, "info": {"num_steps_sampled": 256800, "num_steps_trained": 256800, "default": {"policy_loss": -0.12373081594705582, "vf_explained_var": 0.9054085612297058, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 16.972644805908203, "entropy": 13.323928833007812, "kl": 0.013606571592390537, "total_loss": 16.86269187927246}, "sample_time_ms": 247686.897, "grad_time_ms": 696.479, "load_time_ms": 1.586, "update_time_ms": 2.517}, "timesteps_total": 256800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 256800, "hostname": "cda-server-3", "episode_reward_max": -48.388893830147204}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 53255.89246845245, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -81.13652323493616, "iterations_since_restore": 215, "episodes_total": 5160, "timestamp": 1756447674, "episode_reward_mean": -53.73372533272343, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_08-07-54", "policy_reward_mean": {}, "time_this_iter_s": 231.74579548835754, "episodes_this_iter": 24, "training_iteration": 215, "time_total_s": 53255.89246845245, "info": {"num_steps_sampled": 258000, "num_steps_trained": 258000, "default": {"policy_loss": -0.11627980321645737, "vf_explained_var": 0.9099230170249939, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 14.198071479797363, "entropy": 13.262800216674805, "kl": 0.01339254342019558, "total_loss": 14.095352172851562}, "sample_time_ms": 246925.159, "grad_time_ms": 695.831, "load_time_ms": 1.562, "update_time_ms": 2.554}, "timesteps_total": 258000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 258000, "hostname": "cda-server-3", "episode_reward_max": -48.388893830147204}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 53515.743619441986, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -92.46390703641067, "iterations_since_restore": 216, "episodes_total": 5184, "timestamp": 1756447934, "episode_reward_mean": -53.932576924530615, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_08-12-14", "policy_reward_mean": {}, "time_this_iter_s": 259.85115098953247, "episodes_this_iter": 24, "training_iteration": 216, "time_total_s": 53515.743619441986, "info": {"num_steps_sampled": 259200, "num_steps_trained": 259200, "default": {"policy_loss": -0.12003253400325775, "vf_explained_var": 0.9072751998901367, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 15.475444793701172, "entropy": 13.39101505279541, "kl": 0.013980243355035782, "total_loss": 15.36956787109375}, "sample_time_ms": 244104.479, "grad_time_ms": 697.843, "load_time_ms": 1.577, "update_time_ms": 2.535}, "timesteps_total": 259200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 259200, "hostname": "cda-server-3", "episode_reward_max": -47.03767859697603}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 53759.407838344574, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -92.46390703641067, "iterations_since_restore": 217, "episodes_total": 5208, "timestamp": 1756448177, "episode_reward_mean": -53.47457909992057, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_08-16-17", "policy_reward_mean": {}, "time_this_iter_s": 243.6642189025879, "episodes_this_iter": 24, "training_iteration": 217, "time_total_s": 53759.407838344574, "info": {"num_steps_sampled": 260400, "num_steps_trained": 260400, "default": {"policy_loss": -0.13741131126880646, "vf_explained_var": 0.9654526114463806, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 4.5877685546875, "entropy": 13.240228652954102, "kl": 0.01664682850241661, "total_loss": 4.467211723327637}, "sample_time_ms": 242669.242, "grad_time_ms": 697.574, "load_time_ms": 1.573, "update_time_ms": 2.499}, "timesteps_total": 260400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 260400, "hostname": "cda-server-3", "episode_reward_max": -47.03767859697603}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 53989.52684402466, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -92.46390703641067, "iterations_since_restore": 218, "episodes_total": 5232, "timestamp": 1756448408, "episode_reward_mean": -53.09185593325324, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_08-20-08", "policy_reward_mean": {}, "time_this_iter_s": 230.11900568008423, "episodes_this_iter": 24, "training_iteration": 218, "time_total_s": 53989.52684402466, "info": {"num_steps_sampled": 261600, "num_steps_trained": 261600, "default": {"policy_loss": -0.1212388426065445, "vf_explained_var": 0.9537698030471802, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 6.838181495666504, "entropy": 13.423412322998047, "kl": 0.017338156700134277, "total_loss": 6.7344970703125}, "sample_time_ms": 236850.174, "grad_time_ms": 698.088, "load_time_ms": 1.564, "update_time_ms": 2.498}, "timesteps_total": 261600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 261600, "hostname": "cda-server-3", "episode_reward_max": -47.03767859697603}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 54236.05536913872, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -92.46390703641067, "iterations_since_restore": 219, "episodes_total": 5256, "timestamp": 1756448654, "episode_reward_mean": -53.0488133532636, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_08-24-14", "policy_reward_mean": {}, "time_this_iter_s": 246.52852511405945, "episodes_this_iter": 24, "training_iteration": 219, "time_total_s": 54236.05536913872, "info": {"num_steps_sampled": 262800, "num_steps_trained": 262800, "default": {"policy_loss": -0.12424381822347641, "vf_explained_var": 0.9722467064857483, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 3.777590751647949, "entropy": 13.256404876708984, "kl": 0.016340035945177078, "total_loss": 3.669891357421875}, "sample_time_ms": 237256.053, "grad_time_ms": 699.231, "load_time_ms": 1.612, "update_time_ms": 2.504}, "timesteps_total": 262800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 262800, "hostname": "cda-server-3", "episode_reward_max": -47.03767859697603}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 54476.720437288284, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -59.73112114747605, "iterations_since_restore": 220, "episodes_total": 5280, "timestamp": 1756448895, "episode_reward_mean": -52.58953990415711, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_08-28-15", "policy_reward_mean": {}, "time_this_iter_s": 240.66506814956665, "episodes_this_iter": 24, "training_iteration": 220, "time_total_s": 54476.720437288284, "info": {"num_steps_sampled": 264000, "num_steps_trained": 264000, "default": {"policy_loss": -0.1257346272468567, "vf_explained_var": 0.9700483679771423, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 3.909719705581665, "entropy": 13.458242416381836, "kl": 0.01716863550245762, "total_loss": 3.801368236541748}, "sample_time_ms": 239528.368, "grad_time_ms": 699.809, "load_time_ms": 1.663, "update_time_ms": 2.522}, "timesteps_total": 264000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 264000, "hostname": "cda-server-3", "episode_reward_max": -49.35778091512252}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 54716.46133208275, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -59.73112114747605, "iterations_since_restore": 221, "episodes_total": 5304, "timestamp": 1756449135, "episode_reward_mean": -52.64133109806006, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_08-32-15", "policy_reward_mean": {}, "time_this_iter_s": 239.7408947944641, "episodes_this_iter": 24, "training_iteration": 221, "time_total_s": 54716.46133208275, "info": {"num_steps_sampled": 265200, "num_steps_trained": 265200, "default": {"policy_loss": -0.12260796129703522, "vf_explained_var": 0.9685428142547607, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 4.314360618591309, "entropy": 13.286518096923828, "kl": 0.015742920339107513, "total_loss": 4.207692623138428}, "sample_time_ms": 239897.071, "grad_time_ms": 697.716, "load_time_ms": 1.677, "update_time_ms": 2.503}, "timesteps_total": 265200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 265200, "hostname": "cda-server-3", "episode_reward_max": -49.35778091512252}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 54962.24299144745, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -59.73112114747605, "iterations_since_restore": 222, "episodes_total": 5328, "timestamp": 1756449380, "episode_reward_mean": -52.525968282336315, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_08-36-20", "policy_reward_mean": {}, "time_this_iter_s": 245.78165936470032, "episodes_this_iter": 24, "training_iteration": 222, "time_total_s": 54962.24299144745, "info": {"num_steps_sampled": 266400, "num_steps_trained": 266400, "default": {"policy_loss": -0.12170767784118652, "vf_explained_var": 0.9610524773597717, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 5.0196027755737305, "entropy": 13.212718963623047, "kl": 0.01548507995903492, "total_loss": 4.913573741912842}, "sample_time_ms": 241645.905, "grad_time_ms": 697.901, "load_time_ms": 1.665, "update_time_ms": 2.55}, "timesteps_total": 266400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 266400, "hostname": "cda-server-3", "episode_reward_max": -49.00649469013475}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 55248.51720046997, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -83.55056700243956, "iterations_since_restore": 223, "episodes_total": 5352, "timestamp": 1756449667, "episode_reward_mean": -52.698129910872005, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_08-41-07", "policy_reward_mean": {}, "time_this_iter_s": 286.274209022522, "episodes_this_iter": 24, "training_iteration": 223, "time_total_s": 55248.51720046997, "info": {"num_steps_sampled": 267600, "num_steps_trained": 267600, "default": {"policy_loss": -0.12211109697818756, "vf_explained_var": 0.9223343729972839, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 13.342047691345215, "entropy": 13.361546516418457, "kl": 0.012498829513788223, "total_loss": 13.23259162902832}, "sample_time_ms": 246272.877, "grad_time_ms": 698.403, "load_time_ms": 1.68, "update_time_ms": 2.542}, "timesteps_total": 267600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 267600, "hostname": "cda-server-3", "episode_reward_max": -49.00649469013475}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 55457.6604681015, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -83.55056700243956, "iterations_since_restore": 224, "episodes_total": 5376, "timestamp": 1756449876, "episode_reward_mean": -52.71015166295291, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_08-44-36", "policy_reward_mean": {}, "time_this_iter_s": 209.14326763153076, "episodes_this_iter": 24, "training_iteration": 224, "time_total_s": 55457.6604681015, "info": {"num_steps_sampled": 268800, "num_steps_trained": 268800, "default": {"policy_loss": -0.13790854811668396, "vf_explained_var": 0.9650555849075317, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 4.732025623321533, "entropy": 13.232372283935547, "kl": 0.01659400947391987, "total_loss": 4.610918045043945}, "sample_time_ms": 242643.923, "grad_time_ms": 698.085, "load_time_ms": 1.7, "update_time_ms": 2.541}, "timesteps_total": 268800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 268800, "hostname": "cda-server-3", "episode_reward_max": -49.00649469013475}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 55714.48773908615, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -83.55056700243956, "iterations_since_restore": 225, "episodes_total": 5400, "timestamp": 1756450133, "episode_reward_mean": -52.85868581510861, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_08-48-53", "policy_reward_mean": {}, "time_this_iter_s": 256.82727098464966, "episodes_this_iter": 24, "training_iteration": 225, "time_total_s": 55714.48773908615, "info": {"num_steps_sampled": 270000, "num_steps_trained": 270000, "default": {"policy_loss": -0.13200251758098602, "vf_explained_var": 0.9513610005378723, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 6.916146278381348, "entropy": 13.240900993347168, "kl": 0.016578860580921173, "total_loss": 6.800930023193359}, "sample_time_ms": 245150.936, "grad_time_ms": 699.226, "load_time_ms": 1.708, "update_time_ms": 2.534}, "timesteps_total": 270000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 270000, "hostname": "cda-server-3", "episode_reward_max": -49.00649469013475}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 55974.45828509331, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -83.55056700243956, "iterations_since_restore": 226, "episodes_total": 5424, "timestamp": 1756450393, "episode_reward_mean": -52.82738876249813, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_08-53-13", "policy_reward_mean": {}, "time_this_iter_s": 259.9705460071564, "episodes_this_iter": 24, "training_iteration": 226, "time_total_s": 55974.45828509331, "info": {"num_steps_sampled": 271200, "num_steps_trained": 271200, "default": {"policy_loss": -0.12473750114440918, "vf_explained_var": 0.9689039587974548, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 4.646268844604492, "entropy": 13.287884712219238, "kl": 0.01698196679353714, "total_loss": 4.538724899291992}, "sample_time_ms": 245165.588, "grad_time_ms": 696.612, "load_time_ms": 1.62, "update_time_ms": 2.564}, "timesteps_total": 271200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 271200, "hostname": "cda-server-3", "episode_reward_max": -49.016792454608456}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 56223.66062140465, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -79.4545443855248, "iterations_since_restore": 227, "episodes_total": 5448, "timestamp": 1756450642, "episode_reward_mean": -52.9500375272901, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_08-57-22", "policy_reward_mean": {}, "time_this_iter_s": 249.20233631134033, "episodes_this_iter": 24, "training_iteration": 227, "time_total_s": 56223.66062140465, "info": {"num_steps_sampled": 272400, "num_steps_trained": 272400, "default": {"policy_loss": -0.10819558054208755, "vf_explained_var": 0.8816754817962646, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 23.217864990234375, "entropy": 13.26015853881836, "kl": 0.012501864694058895, "total_loss": 23.12232780456543}, "sample_time_ms": 245719.133, "grad_time_ms": 696.818, "load_time_ms": 1.61, "update_time_ms": 2.574}, "timesteps_total": 272400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 272400, "hostname": "cda-server-3", "episode_reward_max": -49.268852078605434}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 56507.24248743057, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -79.4545443855248, "iterations_since_restore": 228, "episodes_total": 5472, "timestamp": 1756450925, "episode_reward_mean": -52.91810579853349, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_09-02-05", "policy_reward_mean": {}, "time_this_iter_s": 283.5818660259247, "episodes_this_iter": 24, "training_iteration": 228, "time_total_s": 56507.24248743057, "info": {"num_steps_sampled": 273600, "num_steps_trained": 273600, "default": {"policy_loss": -0.10992512106895447, "vf_explained_var": 0.944269597530365, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 7.72075080871582, "entropy": 13.22465705871582, "kl": 0.016507161781191826, "total_loss": 7.627538204193115}, "sample_time_ms": 251065.61, "grad_time_ms": 696.614, "load_time_ms": 1.599, "update_time_ms": 2.572}, "timesteps_total": 273600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 273600, "hostname": "cda-server-3", "episode_reward_max": -49.98911850932992}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 56702.564005851746, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -79.4545443855248, "iterations_since_restore": 229, "episodes_total": 5496, "timestamp": 1756451121, "episode_reward_mean": -52.85347165246375, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_09-05-21", "policy_reward_mean": {}, "time_this_iter_s": 195.3215184211731, "episodes_this_iter": 24, "training_iteration": 229, "time_total_s": 56702.564005851746, "info": {"num_steps_sampled": 274800, "num_steps_trained": 274800, "default": {"policy_loss": -0.1320653110742569, "vf_explained_var": 0.9556113481521606, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 6.415472984313965, "entropy": 13.11414909362793, "kl": 0.016584740951657295, "total_loss": 6.300199508666992}, "sample_time_ms": 245944.854, "grad_time_ms": 696.672, "load_time_ms": 1.598, "update_time_ms": 2.527}, "timesteps_total": 274800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 274800, "hostname": "cda-server-3", "episode_reward_max": -49.98911850932992}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 56951.91757917404, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -79.4545443855248, "iterations_since_restore": 230, "episodes_total": 5520, "timestamp": 1756451370, "episode_reward_mean": -52.92350903145639, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_09-09-30", "policy_reward_mean": {}, "time_this_iter_s": 249.35357332229614, "episodes_this_iter": 24, "training_iteration": 230, "time_total_s": 56951.91757917404, "info": {"num_steps_sampled": 276000, "num_steps_trained": 276000, "default": {"policy_loss": -0.11662941426038742, "vf_explained_var": 0.9661198854446411, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 4.510845184326172, "entropy": 13.191387176513672, "kl": 0.01542899664491415, "total_loss": 4.40983772277832}, "sample_time_ms": 246813.773, "grad_time_ms": 696.606, "load_time_ms": 1.602, "update_time_ms": 2.518}, "timesteps_total": 276000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 276000, "hostname": "cda-server-3", "episode_reward_max": -50.02913413293667}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 57180.85185909271, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -66.7089208892692, "iterations_since_restore": 231, "episodes_total": 5544, "timestamp": 1756451599, "episode_reward_mean": -52.592358692493825, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_09-13-19", "policy_reward_mean": {}, "time_this_iter_s": 228.93427991867065, "episodes_this_iter": 24, "training_iteration": 231, "time_total_s": 57180.85185909271, "info": {"num_steps_sampled": 277200, "num_steps_trained": 277200, "default": {"policy_loss": -0.13687659800052643, "vf_explained_var": 0.9492168426513672, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 6.889738082885742, "entropy": 12.978316307067871, "kl": 0.01719477027654648, "total_loss": 6.770271301269531}, "sample_time_ms": 245731.909, "grad_time_ms": 697.838, "load_time_ms": 1.582, "update_time_ms": 2.513}, "timesteps_total": 277200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 277200, "hostname": "cda-server-3", "episode_reward_max": -49.17351010815454}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 57436.04451966286, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -66.7089208892692, "iterations_since_restore": 232, "episodes_total": 5568, "timestamp": 1756451854, "episode_reward_mean": -52.62568365697358, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_09-17-34", "policy_reward_mean": {}, "time_this_iter_s": 255.19266057014465, "episodes_this_iter": 24, "training_iteration": 232, "time_total_s": 57436.04451966286, "info": {"num_steps_sampled": 278400, "num_steps_trained": 278400, "default": {"policy_loss": -0.14635403454303741, "vf_explained_var": 0.9654095768928528, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 4.514674186706543, "entropy": 13.11203384399414, "kl": 0.016351299360394478, "total_loss": 4.384875774383545}, "sample_time_ms": 246673.487, "grad_time_ms": 697.352, "load_time_ms": 1.581, "update_time_ms": 2.472}, "timesteps_total": 278400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 278400, "hostname": "cda-server-3", "episode_reward_max": -49.17351010815454}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 57669.30855512619, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -66.7089208892692, "iterations_since_restore": 233, "episodes_total": 5592, "timestamp": 1756452087, "episode_reward_mean": -52.547124175309, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_09-21-27", "policy_reward_mean": {}, "time_this_iter_s": 233.26403546333313, "episodes_this_iter": 24, "training_iteration": 233, "time_total_s": 57669.30855512619, "info": {"num_steps_sampled": 279600, "num_steps_trained": 279600, "default": {"policy_loss": -0.14203177392482758, "vf_explained_var": 0.965411901473999, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 4.680802822113037, "entropy": 13.123396873474121, "kl": 0.015472842380404472, "total_loss": 4.554436683654785}, "sample_time_ms": 241372.063, "grad_time_ms": 697.741, "load_time_ms": 1.555, "update_time_ms": 2.509}, "timesteps_total": 279600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 279600, "hostname": "cda-server-3", "episode_reward_max": -49.17351010815454}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 57916.46813702583, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -66.7089208892692, "iterations_since_restore": 234, "episodes_total": 5616, "timestamp": 1756452335, "episode_reward_mean": -52.47950947759737, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_09-25-35", "policy_reward_mean": {}, "time_this_iter_s": 247.15958189964294, "episodes_this_iter": 24, "training_iteration": 234, "time_total_s": 57916.46813702583, "info": {"num_steps_sampled": 280800, "num_steps_trained": 280800, "default": {"policy_loss": -0.1272757351398468, "vf_explained_var": 0.9736604690551758, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 3.4182353019714355, "entropy": 13.120083808898926, "kl": 0.01562454178929329, "total_loss": 3.306779384613037}, "sample_time_ms": 245174.253, "grad_time_ms": 697.181, "load_time_ms": 1.549, "update_time_ms": 2.539}, "timesteps_total": 280800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 280800, "hostname": "cda-server-3", "episode_reward_max": -49.17351010815454}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 58124.95299601555, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -66.7089208892692, "iterations_since_restore": 235, "episodes_total": 5640, "timestamp": 1756452543, "episode_reward_mean": -52.54500402832971, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_09-29-03", "policy_reward_mean": {}, "time_this_iter_s": 208.48485898971558, "episodes_this_iter": 24, "training_iteration": 235, "time_total_s": 58124.95299601555, "info": {"num_steps_sampled": 282000, "num_steps_trained": 282000, "default": {"policy_loss": -0.11984744668006897, "vf_explained_var": 0.9667076468467712, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 4.498193740844727, "entropy": 12.998368263244629, "kl": 0.014812729321420193, "total_loss": 4.393343925476074}, "sample_time_ms": 240340.756, "grad_time_ms": 696.409, "load_time_ms": 1.566, "update_time_ms": 2.515}, "timesteps_total": 282000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 282000, "hostname": "cda-server-3", "episode_reward_max": -49.879847194777106}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 58354.85333657265, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -63.120537966067694, "iterations_since_restore": 236, "episodes_total": 5664, "timestamp": 1756452773, "episode_reward_mean": -52.38867305401343, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_09-32-53", "policy_reward_mean": {}, "time_this_iter_s": 229.9003405570984, "episodes_this_iter": 24, "training_iteration": 236, "time_total_s": 58354.85333657265, "info": {"num_steps_sampled": 283200, "num_steps_trained": 283200, "default": {"policy_loss": -0.12795832753181458, "vf_explained_var": 0.9684709906578064, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 4.145485877990723, "entropy": 12.91740894317627, "kl": 0.01682090386748314, "total_loss": 4.0345587730407715}, "sample_time_ms": 237333.004, "grad_time_ms": 697.025, "load_time_ms": 1.646, "update_time_ms": 2.515}, "timesteps_total": 283200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 283200, "hostname": "cda-server-3", "episode_reward_max": -49.41547090352766}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 58596.74061131477, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -63.120537966067694, "iterations_since_restore": 237, "episodes_total": 5688, "timestamp": 1756453015, "episode_reward_mean": -52.378672504431236, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_09-36-55", "policy_reward_mean": {}, "time_this_iter_s": 241.88727474212646, "episodes_this_iter": 24, "training_iteration": 237, "time_total_s": 58596.74061131477, "info": {"num_steps_sampled": 284400, "num_steps_trained": 284400, "default": {"policy_loss": -0.12086444348096848, "vf_explained_var": 0.9703031182289124, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 4.048018932342529, "entropy": 13.210933685302734, "kl": 0.01684574969112873, "total_loss": 3.944211006164551}, "sample_time_ms": 236600.769, "grad_time_ms": 697.81, "load_time_ms": 1.621, "update_time_ms": 2.511}, "timesteps_total": 284400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 284400, "hostname": "cda-server-3", "episode_reward_max": -49.41547090352766}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 58796.771169900894, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -62.37009129837001, "iterations_since_restore": 238, "episodes_total": 5712, "timestamp": 1756453215, "episode_reward_mean": -52.37850576015482, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_09-40-15", "policy_reward_mean": {}, "time_this_iter_s": 200.0305585861206, "episodes_this_iter": 24, "training_iteration": 238, "time_total_s": 58796.771169900894, "info": {"num_steps_sampled": 285600, "num_steps_trained": 285600, "default": {"policy_loss": -0.1238275095820427, "vf_explained_var": 0.9677734375, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 4.313910484313965, "entropy": 12.92751693725586, "kl": 0.015617319382727146, "total_loss": 4.205895900726318}, "sample_time_ms": 228245.691, "grad_time_ms": 697.581, "load_time_ms": 1.699, "update_time_ms": 2.532}, "timesteps_total": 285600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 285600, "hostname": "cda-server-3", "episode_reward_max": -49.41547090352766}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 59026.676966905594, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -95.4942763001984, "iterations_since_restore": 239, "episodes_total": 5736, "timestamp": 1756453445, "episode_reward_mean": -52.737142631935086, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_09-44-05", "policy_reward_mean": {}, "time_this_iter_s": 229.9057970046997, "episodes_this_iter": 24, "training_iteration": 239, "time_total_s": 59026.676966905594, "info": {"num_steps_sampled": 286800, "num_steps_trained": 286800, "default": {"policy_loss": -0.13358724117279053, "vf_explained_var": 0.9385756254196167, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 12.510951042175293, "entropy": 12.898584365844727, "kl": 0.01375828217715025, "total_loss": 12.391292572021484}, "sample_time_ms": 231704.422, "grad_time_ms": 697.294, "load_time_ms": 1.688, "update_time_ms": 2.559}, "timesteps_total": 286800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 286800, "hostname": "cda-server-3", "episode_reward_max": -49.21767791815008}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 59265.3185608387, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -95.4942763001984, "iterations_since_restore": 240, "episodes_total": 5760, "timestamp": 1756453684, "episode_reward_mean": -52.989287994986306, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_09-48-04", "policy_reward_mean": {}, "time_this_iter_s": 238.64159393310547, "episodes_this_iter": 24, "training_iteration": 240, "time_total_s": 59265.3185608387, "info": {"num_steps_sampled": 288000, "num_steps_trained": 288000, "default": {"policy_loss": -0.12578149139881134, "vf_explained_var": 0.9672372341156006, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 4.648038864135742, "entropy": 12.954557418823242, "kl": 0.01652970165014267, "total_loss": 4.5389933586120605}, "sample_time_ms": 230634.254, "grad_time_ms": 696.269, "load_time_ms": 1.702, "update_time_ms": 2.559}, "timesteps_total": 288000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 288000, "hostname": "cda-server-3", "episode_reward_max": -49.21767791815008}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 59519.89746642113, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -106.44784318134155, "iterations_since_restore": 241, "episodes_total": 5784, "timestamp": 1756453938, "episode_reward_mean": -53.59184756133134, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_09-52-18", "policy_reward_mean": {}, "time_this_iter_s": 254.57890558242798, "episodes_this_iter": 24, "training_iteration": 241, "time_total_s": 59519.89746642113, "info": {"num_steps_sampled": 289200, "num_steps_trained": 289200, "default": {"policy_loss": -0.11200863867998123, "vf_explained_var": 0.9398728609085083, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 11.982730865478516, "entropy": 12.876962661743164, "kl": 0.012609120458364487, "total_loss": 11.883487701416016}, "sample_time_ms": 233197.913, "grad_time_ms": 697.002, "load_time_ms": 1.706, "update_time_ms": 2.571}, "timesteps_total": 289200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 289200, "hostname": "cda-server-3", "episode_reward_max": -49.21767791815008}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 59724.84717440605, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -106.44784318134155, "iterations_since_restore": 242, "episodes_total": 5808, "timestamp": 1756454143, "episode_reward_mean": -53.62086601566846, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_09-55-43", "policy_reward_mean": {}, "time_this_iter_s": 204.94970798492432, "episodes_this_iter": 24, "training_iteration": 242, "time_total_s": 59724.84717440605, "info": {"num_steps_sampled": 290400, "num_steps_trained": 290400, "default": {"policy_loss": -0.12599098682403564, "vf_explained_var": 0.9540507793426514, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 6.292912483215332, "entropy": 12.756505966186523, "kl": 0.015495683066546917, "total_loss": 6.182610511779785}, "sample_time_ms": 228172.987, "grad_time_ms": 697.602, "load_time_ms": 1.711, "update_time_ms": 2.593}, "timesteps_total": 290400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 290400, "hostname": "cda-server-3", "episode_reward_max": -49.21767791815008}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 59956.89122271538, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -106.44784318134155, "iterations_since_restore": 243, "episodes_total": 5832, "timestamp": 1756454375, "episode_reward_mean": -53.901326526581414, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_09-59-35", "policy_reward_mean": {}, "time_this_iter_s": 232.04404830932617, "episodes_this_iter": 24, "training_iteration": 243, "time_total_s": 59956.89122271538, "info": {"num_steps_sampled": 291600, "num_steps_trained": 291600, "default": {"policy_loss": -0.12324307858943939, "vf_explained_var": 0.9141952991485596, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 14.333452224731445, "entropy": 13.002093315124512, "kl": 0.014958103187382221, "total_loss": 14.225353240966797}, "sample_time_ms": 228052.492, "grad_time_ms": 696.181, "load_time_ms": 1.651, "update_time_ms": 2.577}, "timesteps_total": 291600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 291600, "hostname": "cda-server-3", "episode_reward_max": -49.21767791815008}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 60170.40907239914, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -106.44784318134155, "iterations_since_restore": 244, "episodes_total": 5856, "timestamp": 1756454589, "episode_reward_mean": -53.29756909889751, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_10-03-09", "policy_reward_mean": {}, "time_this_iter_s": 213.5178496837616, "episodes_this_iter": 24, "training_iteration": 244, "time_total_s": 60170.40907239914, "info": {"num_steps_sampled": 292800, "num_steps_trained": 292800, "default": {"policy_loss": -0.13375505805015564, "vf_explained_var": 0.97227942943573, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 3.7006442546844482, "entropy": 12.993228912353516, "kl": 0.016853027045726776, "total_loss": 3.5839526653289795}, "sample_time_ms": 224688.31, "grad_time_ms": 696.174, "load_time_ms": 1.645, "update_time_ms": 2.576}, "timesteps_total": 292800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 292800, "hostname": "cda-server-3", "episode_reward_max": -49.240560247852144}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 60411.574466466904, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -76.23910984773836, "iterations_since_restore": 245, "episodes_total": 5880, "timestamp": 1756454830, "episode_reward_mean": -52.81890130686282, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_10-07-10", "policy_reward_mean": {}, "time_this_iter_s": 241.16539406776428, "episodes_this_iter": 24, "training_iteration": 245, "time_total_s": 60411.574466466904, "info": {"num_steps_sampled": 294000, "num_steps_trained": 294000, "default": {"policy_loss": -0.1345943808555603, "vf_explained_var": 0.9743247628211975, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 3.4963855743408203, "entropy": 12.831571578979492, "kl": 0.016198769211769104, "total_loss": 3.378192186355591}, "sample_time_ms": 227954.699, "grad_time_ms": 697.876, "load_time_ms": 1.628, "update_time_ms": 2.576}, "timesteps_total": 294000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 294000, "hostname": "cda-server-3", "episode_reward_max": -49.240560247852144}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 60602.57510614395, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -76.23910984773836, "iterations_since_restore": 246, "episodes_total": 5904, "timestamp": 1756455021, "episode_reward_mean": -52.90135958003802, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_10-10-21", "policy_reward_mean": {}, "time_this_iter_s": 191.00063967704773, "episodes_this_iter": 24, "training_iteration": 246, "time_total_s": 60602.57510614395, "info": {"num_steps_sampled": 295200, "num_steps_trained": 295200, "default": {"policy_loss": -0.1390654593706131, "vf_explained_var": 0.9514430165290833, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 6.642275810241699, "entropy": 12.809523582458496, "kl": 0.018181614577770233, "total_loss": 6.52161979675293}, "sample_time_ms": 224064.105, "grad_time_ms": 698.505, "load_time_ms": 1.624, "update_time_ms": 2.565}, "timesteps_total": 295200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 295200, "hostname": "cda-server-3", "episode_reward_max": -49.240560247852144}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 60830.046969652176, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -65.48786138168421, "iterations_since_restore": 247, "episodes_total": 5928, "timestamp": 1756455248, "episode_reward_mean": -52.881967742395965, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_10-14-08", "policy_reward_mean": {}, "time_this_iter_s": 227.4718635082245, "episodes_this_iter": 24, "training_iteration": 247, "time_total_s": 60830.046969652176, "info": {"num_steps_sampled": 296400, "num_steps_trained": 296400, "default": {"policy_loss": -0.13701820373535156, "vf_explained_var": 0.9588128328323364, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 5.714297771453857, "entropy": 12.85362720489502, "kl": 0.017220674082636833, "total_loss": 5.594715595245361}, "sample_time_ms": 222621.545, "grad_time_ms": 699.502, "load_time_ms": 1.664, "update_time_ms": 2.558}, "timesteps_total": 296400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 296400, "hostname": "cda-server-3", "episode_reward_max": -51.05344091696414}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 61065.82716369629, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -65.48786138168421, "iterations_since_restore": 248, "episodes_total": 5952, "timestamp": 1756455484, "episode_reward_mean": -52.927681770163744, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_10-18-04", "policy_reward_mean": {}, "time_this_iter_s": 235.78019404411316, "episodes_this_iter": 24, "training_iteration": 248, "time_total_s": 61065.82716369629, "info": {"num_steps_sampled": 297600, "num_steps_trained": 297600, "default": {"policy_loss": -0.1399531066417694, "vf_explained_var": 0.9703459143638611, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 4.2092742919921875, "entropy": 12.845855712890625, "kl": 0.01608450338244438, "total_loss": 4.085606575012207}, "sample_time_ms": 226196.996, "grad_time_ms": 699.192, "load_time_ms": 1.588, "update_time_ms": 2.532}, "timesteps_total": 297600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 297600, "hostname": "cda-server-3", "episode_reward_max": -51.05344091696414}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 61246.74543738365, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -65.48786138168421, "iterations_since_restore": 249, "episodes_total": 5976, "timestamp": 1756455665, "episode_reward_mean": -52.78914995655172, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_10-21-05", "policy_reward_mean": {}, "time_this_iter_s": 180.91827368736267, "episodes_this_iter": 24, "training_iteration": 249, "time_total_s": 61246.74543738365, "info": {"num_steps_sampled": 298800, "num_steps_trained": 298800, "default": {"policy_loss": -0.11524263024330139, "vf_explained_var": 0.9645593166351318, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 4.863693714141846, "entropy": 12.834324836730957, "kl": 0.01413909625262022, "total_loss": 4.762767314910889}, "sample_time_ms": 221299.41, "grad_time_ms": 698.133, "load_time_ms": 1.508, "update_time_ms": 2.56}, "timesteps_total": 298800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 298800, "hostname": "cda-server-3", "episode_reward_max": -50.57447261648545}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 61500.9609041214, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -70.8772337757874, "iterations_since_restore": 250, "episodes_total": 6000, "timestamp": 1756455919, "episode_reward_mean": -52.827164561053394, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_10-25-19", "policy_reward_mean": {}, "time_this_iter_s": 254.2154667377472, "episodes_this_iter": 24, "training_iteration": 250, "time_total_s": 61500.9609041214, "info": {"num_steps_sampled": 300000, "num_steps_trained": 300000, "default": {"policy_loss": -0.12152360379695892, "vf_explained_var": 0.9502347111701965, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 8.935587882995605, "entropy": 12.682344436645508, "kl": 0.01290571317076683, "total_loss": 8.827131271362305}, "sample_time_ms": 222856.172, "grad_time_ms": 698.697, "load_time_ms": 1.507, "update_time_ms": 2.556}, "timesteps_total": 300000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 300000, "hostname": "cda-server-3", "episode_reward_max": -49.381404257923435}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 61727.96933889389, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -80.43938479448286, "iterations_since_restore": 251, "episodes_total": 6024, "timestamp": 1756456146, "episode_reward_mean": -52.932297495206534, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_10-29-06", "policy_reward_mean": {}, "time_this_iter_s": 227.00843477249146, "episodes_this_iter": 24, "training_iteration": 251, "time_total_s": 61727.96933889389, "info": {"num_steps_sampled": 301200, "num_steps_trained": 301200, "default": {"policy_loss": -0.124129518866539, "vf_explained_var": 0.9445521831512451, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 9.515353202819824, "entropy": 12.694993019104004, "kl": 0.013231638818979263, "total_loss": 9.404621124267578}, "sample_time_ms": 220099.801, "grad_time_ms": 698.036, "load_time_ms": 1.503, "update_time_ms": 2.569}, "timesteps_total": 301200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 301200, "hostname": "cda-server-3", "episode_reward_max": -49.381404257923435}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 61935.81016087532, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -83.02410042439696, "iterations_since_restore": 252, "episodes_total": 6048, "timestamp": 1756456354, "episode_reward_mean": -53.23362229005515, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_10-32-34", "policy_reward_mean": {}, "time_this_iter_s": 207.84082198143005, "episodes_this_iter": 24, "training_iteration": 252, "time_total_s": 61935.81016087532, "info": {"num_steps_sampled": 302400, "num_steps_trained": 302400, "default": {"policy_loss": -0.1158803403377533, "vf_explained_var": 0.8811068534851074, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 23.63001823425293, "entropy": 12.623221397399902, "kl": 0.012091527692973614, "total_loss": 23.52638053894043}, "sample_time_ms": 220389.057, "grad_time_ms": 698.046, "load_time_ms": 1.442, "update_time_ms": 2.575}, "timesteps_total": 302400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 302400, "hostname": "cda-server-3", "episode_reward_max": -48.46283934118226}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 62156.379033088684, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -83.02410042439696, "iterations_since_restore": 253, "episodes_total": 6072, "timestamp": 1756456575, "episode_reward_mean": -53.11197609884594, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_10-36-15", "policy_reward_mean": {}, "time_this_iter_s": 220.56887221336365, "episodes_this_iter": 24, "training_iteration": 253, "time_total_s": 62156.379033088684, "info": {"num_steps_sampled": 303600, "num_steps_trained": 303600, "default": {"policy_loss": -0.12129177153110504, "vf_explained_var": 0.9589307308197021, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 5.636325836181641, "entropy": 12.391603469848633, "kl": 0.016065770760178566, "total_loss": 5.531301021575928}, "sample_time_ms": 219241.656, "grad_time_ms": 697.976, "load_time_ms": 1.409, "update_time_ms": 2.556}, "timesteps_total": 303600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 303600, "hostname": "cda-server-3", "episode_reward_max": -48.46283934118226}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 62395.67424201965, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -83.02410042439696, "iterations_since_restore": 254, "episodes_total": 6096, "timestamp": 1756456814, "episode_reward_mean": -52.897401643507685, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_10-40-14", "policy_reward_mean": {}, "time_this_iter_s": 239.29520893096924, "episodes_this_iter": 24, "training_iteration": 254, "time_total_s": 62395.67424201965, "info": {"num_steps_sampled": 304800, "num_steps_trained": 304800, "default": {"policy_loss": -0.13790710270404816, "vf_explained_var": 0.9639573693275452, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 4.655649185180664, "entropy": 12.867449760437012, "kl": 0.016777753829956055, "total_loss": 4.53472900390625}, "sample_time_ms": 221819.661, "grad_time_ms": 697.707, "load_time_ms": 1.409, "update_time_ms": 2.544}, "timesteps_total": 304800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 304800, "hostname": "cda-server-3", "episode_reward_max": -46.975067536221076}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 62574.46407747269, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -83.02410042439696, "iterations_since_restore": 255, "episodes_total": 6120, "timestamp": 1756456993, "episode_reward_mean": -53.09295997154534, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_10-43-13", "policy_reward_mean": {}, "time_this_iter_s": 178.78983545303345, "episodes_this_iter": 24, "training_iteration": 255, "time_total_s": 62574.46407747269, "info": {"num_steps_sampled": 306000, "num_steps_trained": 306000, "default": {"policy_loss": -0.1259656399488449, "vf_explained_var": 0.9383307695388794, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 10.441603660583496, "entropy": 12.83752727508545, "kl": 0.012530826032161713, "total_loss": 10.328326225280762}, "sample_time_ms": 215582.737, "grad_time_ms": 697.005, "load_time_ms": 1.445, "update_time_ms": 2.567}, "timesteps_total": 306000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 306000, "hostname": "cda-server-3", "episode_reward_max": -46.975067536221076}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 62805.72783088684, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -83.02410042439696, "iterations_since_restore": 256, "episodes_total": 6144, "timestamp": 1756457224, "episode_reward_mean": -52.69068645877551, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_10-47-04", "policy_reward_mean": {}, "time_this_iter_s": 231.26375341415405, "episodes_this_iter": 24, "training_iteration": 256, "time_total_s": 62805.72783088684, "info": {"num_steps_sampled": 307200, "num_steps_trained": 307200, "default": {"policy_loss": -0.11372081190347672, "vf_explained_var": 0.9600616097450256, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 5.309256553649902, "entropy": 12.717082977294922, "kl": 0.015648726373910904, "total_loss": 5.211379528045654}, "sample_time_ms": 219609.757, "grad_time_ms": 696.257, "load_time_ms": 1.456, "update_time_ms": 2.573}, "timesteps_total": 307200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 307200, "hostname": "cda-server-3", "episode_reward_max": -46.975067536221076}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 63022.77389717102, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -76.95679255815752, "iterations_since_restore": 257, "episodes_total": 6168, "timestamp": 1756457441, "episode_reward_mean": -52.35233045584228, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_10-50-41", "policy_reward_mean": {}, "time_this_iter_s": 217.0460662841797, "episodes_this_iter": 24, "training_iteration": 257, "time_total_s": 63022.77389717102, "info": {"num_steps_sampled": 308400, "num_steps_trained": 308400, "default": {"policy_loss": -0.14049550890922546, "vf_explained_var": 0.9665980935096741, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 4.424448490142822, "entropy": 12.783870697021484, "kl": 0.015212688595056534, "total_loss": 4.299355983734131}, "sample_time_ms": 218569.116, "grad_time_ms": 694.29, "load_time_ms": 1.439, "update_time_ms": 2.611}, "timesteps_total": 308400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 308400, "hostname": "cda-server-3", "episode_reward_max": -46.975067536221076}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 63204.996910095215, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -76.95679255815752, "iterations_since_restore": 258, "episodes_total": 6192, "timestamp": 1756457623, "episode_reward_mean": -52.605614783542904, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_10-53-43", "policy_reward_mean": {}, "time_this_iter_s": 182.22301292419434, "episodes_this_iter": 24, "training_iteration": 258, "time_total_s": 63204.996910095215, "info": {"num_steps_sampled": 309600, "num_steps_trained": 309600, "default": {"policy_loss": -0.11564840376377106, "vf_explained_var": 0.9276121854782104, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 11.025399208068848, "entropy": 12.666180610656738, "kl": 0.01288242544978857, "total_loss": 10.922794342041016}, "sample_time_ms": 213212.693, "grad_time_ms": 695.016, "load_time_ms": 1.456, "update_time_ms": 2.624}, "timesteps_total": 309600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 309600, "hostname": "cda-server-3", "episode_reward_max": -48.96675049775499}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 63413.304302453995, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -79.38376949820108, "iterations_since_restore": 259, "episodes_total": 6216, "timestamp": 1756457832, "episode_reward_mean": -52.79132498828461, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_10-57-12", "policy_reward_mean": {}, "time_this_iter_s": 208.3073923587799, "episodes_this_iter": 24, "training_iteration": 259, "time_total_s": 63413.304302453995, "info": {"num_steps_sampled": 310800, "num_steps_trained": 310800, "default": {"policy_loss": -0.13469654321670532, "vf_explained_var": 0.9125310182571411, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 14.25716781616211, "entropy": 12.729401588439941, "kl": 0.014358220621943474, "total_loss": 14.13701057434082}, "sample_time_ms": 215951.335, "grad_time_ms": 695.194, "load_time_ms": 1.538, "update_time_ms": 2.573}, "timesteps_total": 310800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 310800, "hostname": "cda-server-3", "episode_reward_max": -48.96675049775499}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 63619.37710595131, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -79.38376949820108, "iterations_since_restore": 260, "episodes_total": 6240, "timestamp": 1756458038, "episode_reward_mean": -53.09396680432882, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_11-00-38", "policy_reward_mean": {}, "time_this_iter_s": 206.07280349731445, "episodes_this_iter": 24, "training_iteration": 260, "time_total_s": 63619.37710595131, "info": {"num_steps_sampled": 312000, "num_steps_trained": 312000, "default": {"policy_loss": -0.12505127489566803, "vf_explained_var": 0.9432923197746277, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 9.74711799621582, "entropy": 12.539690017700195, "kl": 0.013607031665742397, "total_loss": 9.635843276977539}, "sample_time_ms": 211137.637, "grad_time_ms": 694.838, "load_time_ms": 1.436, "update_time_ms": 2.583}, "timesteps_total": 312000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 312000, "hostname": "cda-server-3", "episode_reward_max": -48.96675049775499}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 63809.00711917877, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -79.38376949820108, "iterations_since_restore": 261, "episodes_total": 6264, "timestamp": 1756458227, "episode_reward_mean": -53.28716145224107, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_11-03-47", "policy_reward_mean": {}, "time_this_iter_s": 189.63001322746277, "episodes_this_iter": 24, "training_iteration": 261, "time_total_s": 63809.00711917877, "info": {"num_steps_sampled": 313200, "num_steps_trained": 313200, "default": {"policy_loss": -0.12509626150131226, "vf_explained_var": 0.9446278810501099, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 8.04684829711914, "entropy": 12.705997467041016, "kl": 0.014072345569729805, "total_loss": 7.936000347137451}, "sample_time_ms": 207399.587, "grad_time_ms": 695.059, "load_time_ms": 1.451, "update_time_ms": 2.572}, "timesteps_total": 313200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 313200, "hostname": "cda-server-3", "episode_reward_max": -49.004492976462004}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 64035.38051056862, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -79.38376949820108, "iterations_since_restore": 262, "episodes_total": 6288, "timestamp": 1756458454, "episode_reward_mean": -53.4176266055403, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_11-07-34", "policy_reward_mean": {}, "time_this_iter_s": 226.3733913898468, "episodes_this_iter": 24, "training_iteration": 262, "time_total_s": 64035.38051056862, "info": {"num_steps_sampled": 314400, "num_steps_trained": 314400, "default": {"policy_loss": -0.13777390122413635, "vf_explained_var": 0.9654431939125061, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 4.891932964324951, "entropy": 12.455157279968262, "kl": 0.015701068565249443, "total_loss": 4.770056247711182}, "sample_time_ms": 209252.26, "grad_time_ms": 695.58, "load_time_ms": 1.509, "update_time_ms": 2.548}, "timesteps_total": 314400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 314400, "hostname": "cda-server-3", "episode_reward_max": -49.004492976462004}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 64277.10109376907, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -83.8225622835028, "iterations_since_restore": 263, "episodes_total": 6312, "timestamp": 1756458696, "episode_reward_mean": -53.768203859822826, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_11-11-36", "policy_reward_mean": {}, "time_this_iter_s": 241.7205832004547, "episodes_this_iter": 24, "training_iteration": 263, "time_total_s": 64277.10109376907, "info": {"num_steps_sampled": 315600, "num_steps_trained": 315600, "default": {"policy_loss": -0.11208131909370422, "vf_explained_var": 0.8933252692222595, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 17.854284286499023, "entropy": 12.356292724609375, "kl": 0.01158389076590538, "total_loss": 17.753929138183594}, "sample_time_ms": 211366.547, "grad_time_ms": 696.315, "load_time_ms": 1.607, "update_time_ms": 2.583}, "timesteps_total": 315600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 315600, "hostname": "cda-server-3", "episode_reward_max": -49.782612914095786}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 64485.63278698921, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -83.8225622835028, "iterations_since_restore": 264, "episodes_total": 6336, "timestamp": 1756458904, "episode_reward_mean": -53.170355109342026, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_11-15-04", "policy_reward_mean": {}, "time_this_iter_s": 208.53169322013855, "episodes_this_iter": 24, "training_iteration": 264, "time_total_s": 64485.63278698921, "info": {"num_steps_sampled": 316800, "num_steps_trained": 316800, "default": {"policy_loss": -0.14228513836860657, "vf_explained_var": 0.9692507982254028, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 4.200347900390625, "entropy": 12.575531005859375, "kl": 0.01658741384744644, "total_loss": 4.074857711791992}, "sample_time_ms": 208290.047, "grad_time_ms": 696.472, "load_time_ms": 1.609, "update_time_ms": 2.566}, "timesteps_total": 316800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 316800, "hostname": "cda-server-3", "episode_reward_max": -50.16941653944491}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 64703.80116915703, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -83.8225622835028, "iterations_since_restore": 265, "episodes_total": 6360, "timestamp": 1756459122, "episode_reward_mean": -53.07160473149186, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_11-18-42", "policy_reward_mean": {}, "time_this_iter_s": 218.16838216781616, "episodes_this_iter": 24, "training_iteration": 265, "time_total_s": 64703.80116915703, "info": {"num_steps_sampled": 318000, "num_steps_trained": 318000, "default": {"policy_loss": -0.13435477018356323, "vf_explained_var": 0.9649702906608582, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 4.764406681060791, "entropy": 12.417465209960938, "kl": 0.015358511358499527, "total_loss": 4.645602703094482}, "sample_time_ms": 212228.495, "grad_time_ms": 695.929, "load_time_ms": 1.58, "update_time_ms": 2.549}, "timesteps_total": 318000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 318000, "hostname": "cda-server-3", "episode_reward_max": -46.99046521985731}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 64931.42123794556, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -83.8225622835028, "iterations_since_restore": 266, "episodes_total": 6384, "timestamp": 1756459350, "episode_reward_mean": -52.872884910836525, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_11-22-30", "policy_reward_mean": {}, "time_this_iter_s": 227.62006878852844, "episodes_this_iter": 24, "training_iteration": 266, "time_total_s": 64931.42123794556, "info": {"num_steps_sampled": 319200, "num_steps_trained": 319200, "default": {"policy_loss": -0.12467009574174881, "vf_explained_var": 0.97074294090271, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 3.9884033203125, "entropy": 12.425530433654785, "kl": 0.01615087501704693, "total_loss": 3.8800861835479736}, "sample_time_ms": 211864.165, "grad_time_ms": 696.02, "load_time_ms": 1.503, "update_time_ms": 2.543}, "timesteps_total": 319200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 319200, "hostname": "cda-server-3", "episode_reward_max": -46.99046521985731}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 65124.32090330124, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -63.57036311703964, "iterations_since_restore": 267, "episodes_total": 6408, "timestamp": 1756459543, "episode_reward_mean": -52.37472990453051, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_11-25-43", "policy_reward_mean": {}, "time_this_iter_s": 192.89966535568237, "episodes_this_iter": 24, "training_iteration": 267, "time_total_s": 65124.32090330124, "info": {"num_steps_sampled": 320400, "num_steps_trained": 320400, "default": {"policy_loss": -0.13256537914276123, "vf_explained_var": 0.957770586013794, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 5.696261405944824, "entropy": 12.094939231872559, "kl": 0.016220351681113243, "total_loss": 5.580119609832764}, "sample_time_ms": 209449.156, "grad_time_ms": 696.343, "load_time_ms": 1.515, "update_time_ms": 2.54}, "timesteps_total": 320400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 320400, "hostname": "cda-server-3", "episode_reward_max": -46.99046521985731}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 65352.82435941696, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -63.57036311703964, "iterations_since_restore": 268, "episodes_total": 6432, "timestamp": 1756459771, "episode_reward_mean": -52.32770520567257, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_11-29-31", "policy_reward_mean": {}, "time_this_iter_s": 228.50345611572266, "episodes_this_iter": 24, "training_iteration": 268, "time_total_s": 65352.82435941696, "info": {"num_steps_sampled": 321600, "num_steps_trained": 321600, "default": {"policy_loss": -0.13483382761478424, "vf_explained_var": 0.9603874683380127, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 5.443893909454346, "entropy": 12.340784072875977, "kl": 0.015873024240136147, "total_loss": 5.325130939483643}, "sample_time_ms": 214077.564, "grad_time_ms": 695.929, "load_time_ms": 1.495, "update_time_ms": 2.539}, "timesteps_total": 321600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 321600, "hostname": "cda-server-3", "episode_reward_max": -46.99046521985731}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 65557.31867551804, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -63.57036311703964, "iterations_since_restore": 269, "episodes_total": 6456, "timestamp": 1756459976, "episode_reward_mean": -52.411166516284226, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_11-32-56", "policy_reward_mean": {}, "time_this_iter_s": 204.49431610107422, "episodes_this_iter": 24, "training_iteration": 269, "time_total_s": 65557.31867551804, "info": {"num_steps_sampled": 322800, "num_steps_trained": 322800, "default": {"policy_loss": -0.14043231308460236, "vf_explained_var": 0.9675581455230713, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 4.18639612197876, "entropy": 12.402804374694824, "kl": 0.018088258802890778, "total_loss": 4.064278602600098}, "sample_time_ms": 213695.485, "grad_time_ms": 696.66, "load_time_ms": 1.494, "update_time_ms": 2.578}, "timesteps_total": 322800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 322800, "hostname": "cda-server-3", "episode_reward_max": -49.284212041297145}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 65765.64012217522, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -84.43411533360964, "iterations_since_restore": 270, "episodes_total": 6480, "timestamp": 1756460184, "episode_reward_mean": -52.88237131219012, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_11-36-24", "policy_reward_mean": {}, "time_this_iter_s": 208.3214466571808, "episodes_this_iter": 24, "training_iteration": 270, "time_total_s": 65765.64012217522, "info": {"num_steps_sampled": 324000, "num_steps_trained": 324000, "default": {"policy_loss": -0.1212284192442894, "vf_explained_var": 0.9362192153930664, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 11.846854209899902, "entropy": 12.34295654296875, "kl": 0.01328012440353632, "total_loss": 11.7390718460083}, "sample_time_ms": 213920.267, "grad_time_ms": 696.66, "load_time_ms": 1.539, "update_time_ms": 2.571}, "timesteps_total": 324000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 324000, "hostname": "cda-server-3", "episode_reward_max": -49.284212041297145}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 65988.13902163506, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -84.43411533360964, "iterations_since_restore": 271, "episodes_total": 6504, "timestamp": 1756460407, "episode_reward_mean": -52.73400011488515, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_11-40-07", "policy_reward_mean": {}, "time_this_iter_s": 222.49889945983887, "episodes_this_iter": 24, "training_iteration": 271, "time_total_s": 65988.13902163506, "info": {"num_steps_sampled": 325200, "num_steps_trained": 325200, "default": {"policy_loss": -0.12005000561475754, "vf_explained_var": 0.9790047407150269, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 2.753878593444824, "entropy": 12.271784782409668, "kl": 0.017051290720701218, "total_loss": 2.651093006134033}, "sample_time_ms": 217207.977, "grad_time_ms": 696.019, "load_time_ms": 1.434, "update_time_ms": 2.564}, "timesteps_total": 325200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 325200, "hostname": "cda-server-3", "episode_reward_max": -49.284212041297145}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 66196.45666050911, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -84.43411533360964, "iterations_since_restore": 272, "episodes_total": 6528, "timestamp": 1756460615, "episode_reward_mean": -53.14926512060034, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_11-43-35", "policy_reward_mean": {}, "time_this_iter_s": 208.31763887405396, "episodes_this_iter": 24, "training_iteration": 272, "time_total_s": 66196.45666050911, "info": {"num_steps_sampled": 326400, "num_steps_trained": 326400, "default": {"policy_loss": -0.12172228842973709, "vf_explained_var": 0.941199004650116, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 8.98120403289795, "entropy": 12.484696388244629, "kl": 0.015201661735773087, "total_loss": 8.874873161315918}, "sample_time_ms": 215402.317, "grad_time_ms": 696.168, "load_time_ms": 1.386, "update_time_ms": 2.557}, "timesteps_total": 326400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 326400, "hostname": "cda-server-3", "episode_reward_max": -50.37665546265208}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 66390.12393069267, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -84.43411533360964, "iterations_since_restore": 273, "episodes_total": 6552, "timestamp": 1756460809, "episode_reward_mean": -53.24081358545004, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_11-46-49", "policy_reward_mean": {}, "time_this_iter_s": 193.66727018356323, "episodes_this_iter": 24, "training_iteration": 273, "time_total_s": 66390.12393069267, "info": {"num_steps_sampled": 327600, "num_steps_trained": 327600, "default": {"policy_loss": -0.1288047730922699, "vf_explained_var": 0.9695960283279419, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 4.136238098144531, "entropy": 12.348140716552734, "kl": 0.016651269048452377, "total_loss": 4.024292945861816}, "sample_time_ms": 210596.173, "grad_time_ms": 696.955, "load_time_ms": 1.392, "update_time_ms": 2.56}, "timesteps_total": 327600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 327600, "hostname": "cda-server-3", "episode_reward_max": -50.37665546265208}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 66652.66490268707, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -78.3542301798375, "iterations_since_restore": 274, "episodes_total": 6576, "timestamp": 1756461071, "episode_reward_mean": -52.8686931909312, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_11-51-11", "policy_reward_mean": {}, "time_this_iter_s": 262.5409719944, "episodes_this_iter": 24, "training_iteration": 274, "time_total_s": 66652.66490268707, "info": {"num_steps_sampled": 328800, "num_steps_trained": 328800, "default": {"policy_loss": -0.140442356467247, "vf_explained_var": 0.96639484167099, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 4.548933506011963, "entropy": 12.452632904052734, "kl": 0.018309663981199265, "total_loss": 4.427030086517334}, "sample_time_ms": 215996.626, "grad_time_ms": 697.463, "load_time_ms": 1.403, "update_time_ms": 2.545}, "timesteps_total": 328800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 328800, "hostname": "cda-server-3", "episode_reward_max": -50.37665546265208}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 66875.22850847244, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -78.3542301798375, "iterations_since_restore": 275, "episodes_total": 6600, "timestamp": 1756461294, "episode_reward_mean": -52.76412100550012, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_11-54-54", "policy_reward_mean": {}, "time_this_iter_s": 222.56360578536987, "episodes_this_iter": 24, "training_iteration": 275, "time_total_s": 66875.22850847244, "info": {"num_steps_sampled": 330000, "num_steps_trained": 330000, "default": {"policy_loss": -0.13039201498031616, "vf_explained_var": 0.9707435965538025, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 3.792579174041748, "entropy": 12.545645713806152, "kl": 0.017142174765467644, "total_loss": 3.6795437335968018}, "sample_time_ms": 216436.407, "grad_time_ms": 697.167, "load_time_ms": 1.409, "update_time_ms": 2.529}, "timesteps_total": 330000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 330000, "hostname": "cda-server-3", "episode_reward_max": -50.67591107693649}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 67104.32276844978, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -71.39806989782852, "iterations_since_restore": 276, "episodes_total": 6624, "timestamp": 1756461523, "episode_reward_mean": -52.62207614615324, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_11-58-43", "policy_reward_mean": {}, "time_this_iter_s": 229.0942599773407, "episodes_this_iter": 24, "training_iteration": 276, "time_total_s": 67104.32276844978, "info": {"num_steps_sampled": 331200, "num_steps_trained": 331200, "default": {"policy_loss": -0.1321687251329422, "vf_explained_var": 0.9725034236907959, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 4.033144474029541, "entropy": 12.464203834533691, "kl": 0.01608506217598915, "total_loss": 3.917262077331543}, "sample_time_ms": 216582.319, "grad_time_ms": 698.56, "load_time_ms": 1.5, "update_time_ms": 2.511}, "timesteps_total": 331200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 331200, "hostname": "cda-server-3", "episode_reward_max": -50.67591107693649}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 67301.52805280685, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -86.9196750907215, "iterations_since_restore": 277, "episodes_total": 6648, "timestamp": 1756461720, "episode_reward_mean": -53.180265980102625, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_12-02-00", "policy_reward_mean": {}, "time_this_iter_s": 197.20528435707092, "episodes_this_iter": 24, "training_iteration": 277, "time_total_s": 67301.52805280685, "info": {"num_steps_sampled": 332400, "num_steps_trained": 332400, "default": {"policy_loss": -0.1267719715833664, "vf_explained_var": 0.9339027404785156, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 13.914371490478516, "entropy": 12.239913940429688, "kl": 0.014391104690730572, "total_loss": 13.80217170715332}, "sample_time_ms": 217012.603, "grad_time_ms": 698.911, "load_time_ms": 1.494, "update_time_ms": 2.502}, "timesteps_total": 332400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 332400, "hostname": "cda-server-3", "episode_reward_max": -49.05128504421615}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 67517.49462890625, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -86.9196750907215, "iterations_since_restore": 278, "episodes_total": 6672, "timestamp": 1756461936, "episode_reward_mean": -53.126583249710436, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_12-05-36", "policy_reward_mean": {}, "time_this_iter_s": 215.96657609939575, "episodes_this_iter": 24, "training_iteration": 278, "time_total_s": 67517.49462890625, "info": {"num_steps_sampled": 333600, "num_steps_trained": 333600, "default": {"policy_loss": -0.1337561011314392, "vf_explained_var": 0.957696259021759, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 6.064571380615234, "entropy": 12.279397964477539, "kl": 0.01595686562359333, "total_loss": 5.946971893310547}, "sample_time_ms": 215758.99, "grad_time_ms": 698.815, "load_time_ms": 1.508, "update_time_ms": 2.525}, "timesteps_total": 333600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 333600, "hostname": "cda-server-3", "episode_reward_max": -49.05128504421615}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 67772.74753212929, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -86.9196750907215, "iterations_since_restore": 279, "episodes_total": 6696, "timestamp": 1756462191, "episode_reward_mean": -53.49961996527838, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_12-09-51", "policy_reward_mean": {}, "time_this_iter_s": 255.25290322303772, "episodes_this_iter": 24, "training_iteration": 279, "time_total_s": 67772.74753212929, "info": {"num_steps_sampled": 334800, "num_steps_trained": 334800, "default": {"policy_loss": -0.1112731322646141, "vf_explained_var": 0.9488842487335205, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 7.727088928222656, "entropy": 12.261554718017578, "kl": 0.013475686311721802, "total_loss": 7.629459857940674}, "sample_time_ms": 220834.503, "grad_time_ms": 699.083, "load_time_ms": 1.506, "update_time_ms": 2.619}, "timesteps_total": 334800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 334800, "hostname": "cda-server-3", "episode_reward_max": -49.05128504421615}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 67992.6490688324, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -86.9196750907215, "iterations_since_restore": 280, "episodes_total": 6720, "timestamp": 1756462411, "episode_reward_mean": -53.34118500330564, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_12-13-31", "policy_reward_mean": {}, "time_this_iter_s": 219.90153670310974, "episodes_this_iter": 24, "training_iteration": 280, "time_total_s": 67992.6490688324, "info": {"num_steps_sampled": 336000, "num_steps_trained": 336000, "default": {"policy_loss": -0.14288152754306793, "vf_explained_var": 0.9715897440910339, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 3.7593188285827637, "entropy": 12.291492462158203, "kl": 0.016825037077069283, "total_loss": 3.6334729194641113}, "sample_time_ms": 221991.215, "grad_time_ms": 700.27, "load_time_ms": 1.564, "update_time_ms": 2.64}, "timesteps_total": 336000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 336000, "hostname": "cda-server-3", "episode_reward_max": -49.05128504421615}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 68177.69842720032, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -72.29489230435841, "iterations_since_restore": 281, "episodes_total": 6744, "timestamp": 1756462596, "episode_reward_mean": -52.63661777018459, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_12-16-36", "policy_reward_mean": {}, "time_this_iter_s": 185.04935836791992, "episodes_this_iter": 24, "training_iteration": 281, "time_total_s": 68177.69842720032, "info": {"num_steps_sampled": 337200, "num_steps_trained": 337200, "default": {"policy_loss": -0.12455210089683533, "vf_explained_var": 0.972270667552948, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 3.6177749633789062, "entropy": 12.42072582244873, "kl": 0.015108318999409676, "total_loss": 3.5085201263427734}, "sample_time_ms": 218244.897, "grad_time_ms": 701.468, "load_time_ms": 1.665, "update_time_ms": 2.651}, "timesteps_total": 337200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 337200, "hostname": "cda-server-3", "episode_reward_max": -51.17460335793359}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 68388.46821856499, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -78.03348874757802, "iterations_since_restore": 282, "episodes_total": 6768, "timestamp": 1756462807, "episode_reward_mean": -52.87258603097264, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_12-20-07", "policy_reward_mean": {}, "time_this_iter_s": 210.7697913646698, "episodes_this_iter": 24, "training_iteration": 282, "time_total_s": 68388.46821856499, "info": {"num_steps_sampled": 338400, "num_steps_trained": 338400, "default": {"policy_loss": -0.11916964501142502, "vf_explained_var": 0.9402625560760498, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 11.273205757141113, "entropy": 12.366950988769531, "kl": 0.012861553579568863, "total_loss": 11.167058944702148}, "sample_time_ms": 218490.18, "grad_time_ms": 701.265, "load_time_ms": 1.714, "update_time_ms": 2.677}, "timesteps_total": 338400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 338400, "hostname": "cda-server-3", "episode_reward_max": -51.17460335793359}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 68587.79872131348, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -78.03348874757802, "iterations_since_restore": 283, "episodes_total": 6792, "timestamp": 1756463006, "episode_reward_mean": -52.52309877177729, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_12-23-26", "policy_reward_mean": {}, "time_this_iter_s": 199.33050274848938, "episodes_this_iter": 24, "training_iteration": 283, "time_total_s": 68587.79872131348, "info": {"num_steps_sampled": 339600, "num_steps_trained": 339600, "default": {"policy_loss": -0.13121232390403748, "vf_explained_var": 0.9545206427574158, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 6.784745693206787, "entropy": 12.493606567382812, "kl": 0.014576302841305733, "total_loss": 6.668292045593262}, "sample_time_ms": 219056.931, "grad_time_ms": 700.902, "load_time_ms": 1.7, "update_time_ms": 2.642}, "timesteps_total": 339600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 339600, "hostname": "cda-server-3", "episode_reward_max": -48.200563271012534}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 68831.42337942123, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -78.03348874757802, "iterations_since_restore": 284, "episodes_total": 6816, "timestamp": 1756463250, "episode_reward_mean": -52.34025074477718, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_12-27-30", "policy_reward_mean": {}, "time_this_iter_s": 243.62465810775757, "episodes_this_iter": 24, "training_iteration": 284, "time_total_s": 68831.42337942123, "info": {"num_steps_sampled": 340800, "num_steps_trained": 340800, "default": {"policy_loss": -0.1302146315574646, "vf_explained_var": 0.9693320393562317, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 3.9507997035980225, "entropy": 12.259281158447266, "kl": 0.015657953917980194, "total_loss": 3.8364388942718506}, "sample_time_ms": 217165.955, "grad_time_ms": 700.227, "load_time_ms": 1.689, "update_time_ms": 2.648}, "timesteps_total": 340800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 340800, "hostname": "cda-server-3", "episode_reward_max": -48.200563271012534}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 69071.45431423187, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -78.03348874757802, "iterations_since_restore": 285, "episodes_total": 6840, "timestamp": 1756463490, "episode_reward_mean": -52.354673693428815, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_12-31-30", "policy_reward_mean": {}, "time_this_iter_s": 240.03093481063843, "episodes_this_iter": 24, "training_iteration": 285, "time_total_s": 69071.45431423187, "info": {"num_steps_sampled": 342000, "num_steps_trained": 342000, "default": {"policy_loss": -0.1259268820285797, "vf_explained_var": 0.9753335118293762, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 3.3924663066864014, "entropy": 12.422459602355957, "kl": 0.016515301540493965, "total_loss": 3.283261299133301}, "sample_time_ms": 218912.421, "grad_time_ms": 700.466, "load_time_ms": 1.708, "update_time_ms": 2.687}, "timesteps_total": 342000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 342000, "hostname": "cda-server-3", "episode_reward_max": -48.200563271012534}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 69299.64997696877, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -58.96335390541665, "iterations_since_restore": 286, "episodes_total": 6864, "timestamp": 1756463718, "episode_reward_mean": -52.059089461532785, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_12-35-18", "policy_reward_mean": {}, "time_this_iter_s": 228.1956627368927, "episodes_this_iter": 24, "training_iteration": 286, "time_total_s": 69299.64997696877, "info": {"num_steps_sampled": 343200, "num_steps_trained": 343200, "default": {"policy_loss": -0.13080231845378876, "vf_explained_var": 0.9585863947868347, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 5.377427101135254, "entropy": 12.568329811096191, "kl": 0.016125712543725967, "total_loss": 5.262951850891113}, "sample_time_ms": 218823.497, "grad_time_ms": 699.55, "load_time_ms": 1.7, "update_time_ms": 2.679}, "timesteps_total": 343200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 343200, "hostname": "cda-server-3", "episode_reward_max": -48.200563271012534}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 69511.73801374435, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -58.96335390541665, "iterations_since_restore": 287, "episodes_total": 6888, "timestamp": 1756463930, "episode_reward_mean": -52.12903332033729, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_12-38-50", "policy_reward_mean": {}, "time_this_iter_s": 212.088036775589, "episodes_this_iter": 24, "training_iteration": 287, "time_total_s": 69511.73801374435, "info": {"num_steps_sampled": 344400, "num_steps_trained": 344400, "default": {"policy_loss": -0.12991659343242645, "vf_explained_var": 0.9701218008995056, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 3.9993748664855957, "entropy": 12.348908424377441, "kl": 0.014908598735928535, "total_loss": 3.8845536708831787}, "sample_time_ms": 220312.014, "grad_time_ms": 699.283, "load_time_ms": 1.701, "update_time_ms": 2.656}, "timesteps_total": 344400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 344400, "hostname": "cda-server-3", "episode_reward_max": -49.00580362206023}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 69728.40817785263, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -58.96335390541665, "iterations_since_restore": 288, "episodes_total": 6912, "timestamp": 1756464147, "episode_reward_mean": -52.207700063283, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_12-42-27", "policy_reward_mean": {}, "time_this_iter_s": 216.67016410827637, "episodes_this_iter": 24, "training_iteration": 288, "time_total_s": 69728.40817785263, "info": {"num_steps_sampled": 345600, "num_steps_trained": 345600, "default": {"policy_loss": -0.13974148035049438, "vf_explained_var": 0.9652450084686279, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 4.558967590332031, "entropy": 12.57193374633789, "kl": 0.017402615398168564, "total_loss": 4.4368462562561035}, "sample_time_ms": 220382.521, "grad_time_ms": 699.166, "load_time_ms": 1.696, "update_time_ms": 2.646}, "timesteps_total": 345600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 345600, "hostname": "cda-server-3", "episode_reward_max": -48.96425296443912}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 69953.82830810547, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -93.07593011966938, "iterations_since_restore": 289, "episodes_total": 6936, "timestamp": 1756464372, "episode_reward_mean": -52.56730745048848, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_12-46-12", "policy_reward_mean": {}, "time_this_iter_s": 225.42013025283813, "episodes_this_iter": 24, "training_iteration": 289, "time_total_s": 69953.82830810547, "info": {"num_steps_sampled": 346800, "num_steps_trained": 346800, "default": {"policy_loss": -0.10083112120628357, "vf_explained_var": 0.8962631225585938, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 20.528160095214844, "entropy": 12.43372631072998, "kl": 0.011682498268783092, "total_loss": 20.439159393310547}, "sample_time_ms": 217399.421, "grad_time_ms": 699.073, "load_time_ms": 1.708, "update_time_ms": 2.537}, "timesteps_total": 346800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 346800, "hostname": "cda-server-3", "episode_reward_max": -48.96425296443912}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 70200.05345344543, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -93.07593011966938, "iterations_since_restore": 290, "episodes_total": 6960, "timestamp": 1756464619, "episode_reward_mean": -53.04621499979686, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_12-50-19", "policy_reward_mean": {}, "time_this_iter_s": 246.22514533996582, "episodes_this_iter": 24, "training_iteration": 290, "time_total_s": 70200.05345344543, "info": {"num_steps_sampled": 348000, "num_steps_trained": 348000, "default": {"policy_loss": -0.11693020910024643, "vf_explained_var": 0.8908771872520447, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 26.90326690673828, "entropy": 12.324344635009766, "kl": 0.011365074664354324, "total_loss": 26.79784393310547}, "sample_time_ms": 220032.701, "grad_time_ms": 698.208, "load_time_ms": 1.683, "update_time_ms": 2.529}, "timesteps_total": 348000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 348000, "hostname": "cda-server-3", "episode_reward_max": -48.96425296443912}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 70410.31812143326, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -93.07593011966938, "iterations_since_restore": 291, "episodes_total": 6984, "timestamp": 1756464829, "episode_reward_mean": -53.13178389336556, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_12-53-49", "policy_reward_mean": {}, "time_this_iter_s": 210.2646679878235, "episodes_this_iter": 24, "training_iteration": 291, "time_total_s": 70410.31812143326, "info": {"num_steps_sampled": 349200, "num_steps_trained": 349200, "default": {"policy_loss": -0.13924799859523773, "vf_explained_var": 0.967040479183197, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 4.398654937744141, "entropy": 12.41601848602295, "kl": 0.01773855648934841, "total_loss": 4.27736759185791}, "sample_time_ms": 222554.442, "grad_time_ms": 697.992, "load_time_ms": 1.693, "update_time_ms": 2.536}, "timesteps_total": 349200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 349200, "hostname": "cda-server-3", "episode_reward_max": -48.96425296443912}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 70627.03892922401, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -93.07593011966938, "iterations_since_restore": 292, "episodes_total": 7008, "timestamp": 1756465046, "episode_reward_mean": -53.17242746128206, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_12-57-26", "policy_reward_mean": {}, "time_this_iter_s": 216.72080779075623, "episodes_this_iter": 24, "training_iteration": 292, "time_total_s": 70627.03892922401, "info": {"num_steps_sampled": 350400, "num_steps_trained": 350400, "default": {"policy_loss": -0.12980133295059204, "vf_explained_var": 0.968231737613678, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 4.2248711585998535, "entropy": 12.26395034790039, "kl": 0.01560777798295021, "total_loss": 4.110872745513916}, "sample_time_ms": 223150.609, "grad_time_ms": 697.043, "load_time_ms": 1.661, "update_time_ms": 2.506}, "timesteps_total": 350400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 350400, "hostname": "cda-server-3", "episode_reward_max": -49.349361346491975}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 70878.18247318268, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -90.1025840196358, "iterations_since_restore": 293, "episodes_total": 7032, "timestamp": 1756465297, "episode_reward_mean": -52.778861706694286, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_13-01-37", "policy_reward_mean": {}, "time_this_iter_s": 251.14354395866394, "episodes_this_iter": 24, "training_iteration": 293, "time_total_s": 70878.18247318268, "info": {"num_steps_sampled": 351600, "num_steps_trained": 351600, "default": {"policy_loss": -0.14350180327892303, "vf_explained_var": 0.9764432907104492, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 3.0745840072631836, "entropy": 12.347380638122559, "kl": 0.016932280734181404, "total_loss": 2.9482264518737793}, "sample_time_ms": 228332.209, "grad_time_ms": 696.889, "load_time_ms": 1.593, "update_time_ms": 2.5}, "timesteps_total": 351600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 351600, "hostname": "cda-server-3", "episode_reward_max": -49.349361346491975}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 71095.06284427643, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -61.96288074410334, "iterations_since_restore": 294, "episodes_total": 7056, "timestamp": 1756465514, "episode_reward_mean": -52.413159036952635, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_13-05-14", "policy_reward_mean": {}, "time_this_iter_s": 216.88037109375, "episodes_this_iter": 24, "training_iteration": 294, "time_total_s": 71095.06284427643, "info": {"num_steps_sampled": 352800, "num_steps_trained": 352800, "default": {"policy_loss": -0.1359146684408188, "vf_explained_var": 0.9747341871261597, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 3.547532796859741, "entropy": 12.351963996887207, "kl": 0.01467402745038271, "total_loss": 3.4264755249023438}, "sample_time_ms": 225658.221, "grad_time_ms": 696.484, "load_time_ms": 1.581, "update_time_ms": 2.531}, "timesteps_total": 352800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 352800, "hostname": "cda-server-3", "episode_reward_max": -50.951736905180546}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 71324.26915335655, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -58.954857103882475, "iterations_since_restore": 295, "episodes_total": 7080, "timestamp": 1756465743, "episode_reward_mean": -52.2415230325622, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_13-09-03", "policy_reward_mean": {}, "time_this_iter_s": 229.2063090801239, "episodes_this_iter": 24, "training_iteration": 295, "time_total_s": 71324.26915335655, "info": {"num_steps_sampled": 354000, "num_steps_trained": 354000, "default": {"policy_loss": -0.13213542103767395, "vf_explained_var": 0.9717539548873901, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 3.67763352394104, "entropy": 12.306523323059082, "kl": 0.013029721565544605, "total_loss": 3.5586907863616943}, "sample_time_ms": 224574.938, "grad_time_ms": 697.288, "load_time_ms": 1.568, "update_time_ms": 2.525}, "timesteps_total": 354000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 354000, "hostname": "cda-server-3", "episode_reward_max": -50.02923476252851}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 71543.36113262177, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -56.309158459773386, "iterations_since_restore": 296, "episodes_total": 7104, "timestamp": 1756465962, "episode_reward_mean": -52.17289624307936, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_13-12-42", "policy_reward_mean": {}, "time_this_iter_s": 219.091979265213, "episodes_this_iter": 24, "training_iteration": 296, "time_total_s": 71543.36113262177, "info": {"num_steps_sampled": 355200, "num_steps_trained": 355200, "default": {"policy_loss": -0.1349836140871048, "vf_explained_var": 0.9557469487190247, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 5.835244178771973, "entropy": 12.222107887268066, "kl": 0.01717188209295273, "total_loss": 5.717647075653076}, "sample_time_ms": 223664.96, "grad_time_ms": 696.93, "load_time_ms": 1.529, "update_time_ms": 2.564}, "timesteps_total": 355200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 355200, "hostname": "cda-server-3", "episode_reward_max": -46.895097690446974}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 71764.0941464901, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -96.42450975252484, "iterations_since_restore": 297, "episodes_total": 7128, "timestamp": 1756466183, "episode_reward_mean": -52.551490195022886, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_13-16-23", "policy_reward_mean": {}, "time_this_iter_s": 220.7330138683319, "episodes_this_iter": 24, "training_iteration": 297, "time_total_s": 71764.0941464901, "info": {"num_steps_sampled": 356400, "num_steps_trained": 356400, "default": {"policy_loss": -0.10262128710746765, "vf_explained_var": 0.9111029505729675, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 16.357921600341797, "entropy": 12.320260047912598, "kl": 0.011174225248396397, "total_loss": 16.266613006591797}, "sample_time_ms": 224530.074, "grad_time_ms": 696.329, "load_time_ms": 1.538, "update_time_ms": 2.562}, "timesteps_total": 356400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 356400, "hostname": "cda-server-3", "episode_reward_max": -46.895097690446974}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 71956.04703903198, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -96.42450975252484, "iterations_since_restore": 298, "episodes_total": 7152, "timestamp": 1756466375, "episode_reward_mean": -52.72924235526778, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_13-19-35", "policy_reward_mean": {}, "time_this_iter_s": 191.95289254188538, "episodes_this_iter": 24, "training_iteration": 298, "time_total_s": 71956.04703903198, "info": {"num_steps_sampled": 357600, "num_steps_trained": 357600, "default": {"policy_loss": -0.13917165994644165, "vf_explained_var": 0.9431633353233337, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 8.362993240356445, "entropy": 12.179462432861328, "kl": 0.016141919419169426, "total_loss": 8.240165710449219}, "sample_time_ms": 222058.576, "grad_time_ms": 696.136, "load_time_ms": 1.528, "update_time_ms": 2.57}, "timesteps_total": 357600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 357600, "hostname": "cda-server-3", "episode_reward_max": -46.895097690446974}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 72197.96976613998, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -96.42450975252484, "iterations_since_restore": 299, "episodes_total": 7176, "timestamp": 1756466617, "episode_reward_mean": -53.04186033195124, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_13-23-37", "policy_reward_mean": {}, "time_this_iter_s": 241.9227271080017, "episodes_this_iter": 24, "training_iteration": 299, "time_total_s": 72197.96976613998, "info": {"num_steps_sampled": 358800, "num_steps_trained": 358800, "default": {"policy_loss": -0.13167642056941986, "vf_explained_var": 0.9527842402458191, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 7.93591833114624, "entropy": 12.2572660446167, "kl": 0.013497140258550644, "total_loss": 7.81790828704834}, "sample_time_ms": 223709.85, "grad_time_ms": 695.264, "load_time_ms": 1.463, "update_time_ms": 2.553}, "timesteps_total": 358800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 358800, "hostname": "cda-server-3", "episode_reward_max": -46.895097690446974}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 72459.24091768265, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -99.06848104185677, "iterations_since_restore": 300, "episodes_total": 7200, "timestamp": 1756466878, "episode_reward_mean": -53.682056333677174, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_13-27-58", "policy_reward_mean": {}, "time_this_iter_s": 261.2711515426636, "episodes_this_iter": 24, "training_iteration": 300, "time_total_s": 72459.24091768265, "info": {"num_steps_sampled": 360000, "num_steps_trained": 360000, "default": {"policy_loss": -0.13141396641731262, "vf_explained_var": 0.9265610575675964, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 13.346891403198242, "entropy": 12.274971961975098, "kl": 0.015094866044819355, "total_loss": 13.230761528015137}, "sample_time_ms": 225215.112, "grad_time_ms": 694.581, "load_time_ms": 1.466, "update_time_ms": 2.564}, "timesteps_total": 360000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 360000, "hostname": "cda-server-3", "episode_reward_max": -48.35379212325632}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 72683.35219526291, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -99.17453996516333, "iterations_since_restore": 301, "episodes_total": 7224, "timestamp": 1756467102, "episode_reward_mean": -53.750167833270154, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_13-31-42", "policy_reward_mean": {}, "time_this_iter_s": 224.11127758026123, "episodes_this_iter": 24, "training_iteration": 301, "time_total_s": 72683.35219526291, "info": {"num_steps_sampled": 361200, "num_steps_trained": 361200, "default": {"policy_loss": -0.1376352310180664, "vf_explained_var": 0.9457657933235168, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 9.347111701965332, "entropy": 12.26346206665039, "kl": 0.01422292459756136, "total_loss": 9.223877906799316}, "sample_time_ms": 226600.323, "grad_time_ms": 694.031, "load_time_ms": 1.449, "update_time_ms": 2.571}, "timesteps_total": 361200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 361200, "hostname": "cda-server-3", "episode_reward_max": -48.35379212325632}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 72948.92363882065, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -99.17453996516333, "iterations_since_restore": 302, "episodes_total": 7248, "timestamp": 1756467368, "episode_reward_mean": -53.607525853227514, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_13-36-08", "policy_reward_mean": {}, "time_this_iter_s": 265.57144355773926, "episodes_this_iter": 24, "training_iteration": 302, "time_total_s": 72948.92363882065, "info": {"num_steps_sampled": 362400, "num_steps_trained": 362400, "default": {"policy_loss": -0.1350909024477005, "vf_explained_var": 0.9636004567146301, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 4.889998435974121, "entropy": 12.176090240478516, "kl": 0.01710471510887146, "total_loss": 4.772226333618164}, "sample_time_ms": 231485.525, "grad_time_ms": 693.899, "load_time_ms": 1.453, "update_time_ms": 2.591}, "timesteps_total": 362400, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 362400, "hostname": "cda-server-3", "episode_reward_max": -48.35379212325632}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 73200.5001718998, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -99.17453996516333, "iterations_since_restore": 303, "episodes_total": 7272, "timestamp": 1756467619, "episode_reward_mean": -53.29212436288077, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_13-40-19", "policy_reward_mean": {}, "time_this_iter_s": 251.57653307914734, "episodes_this_iter": 24, "training_iteration": 303, "time_total_s": 73200.5001718998, "info": {"num_steps_sampled": 363600, "num_steps_trained": 363600, "default": {"policy_loss": -0.1493072360754013, "vf_explained_var": 0.9692809581756592, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 4.034963607788086, "entropy": 12.088041305541992, "kl": 0.016667162999510765, "total_loss": 3.902531623840332}, "sample_time_ms": 231528.872, "grad_time_ms": 693.734, "load_time_ms": 1.515, "update_time_ms": 2.606}, "timesteps_total": 363600, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 363600, "hostname": "cda-server-3", "episode_reward_max": -50.15217415635844}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 73480.11277294159, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -99.17453996516333, "iterations_since_restore": 304, "episodes_total": 7296, "timestamp": 1756467899, "episode_reward_mean": -52.89721532106234, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_13-44-59", "policy_reward_mean": {}, "time_this_iter_s": 279.6126010417938, "episodes_this_iter": 24, "training_iteration": 304, "time_total_s": 73480.11277294159, "info": {"num_steps_sampled": 364800, "num_steps_trained": 364800, "default": {"policy_loss": -0.11796130239963531, "vf_explained_var": 0.9274365305900574, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 10.442138671875, "entropy": 12.077159881591797, "kl": 0.013736975379288197, "total_loss": 10.338085174560547}, "sample_time_ms": 237800.827, "grad_time_ms": 694.929, "load_time_ms": 1.529, "update_time_ms": 2.62}, "timesteps_total": 364800, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 364800, "hostname": "cda-server-3", "episode_reward_max": -49.92233445051167}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 73758.79197740555, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -67.96758924730126, "iterations_since_restore": 305, "episodes_total": 7320, "timestamp": 1756468178, "episode_reward_mean": -52.48637423160558, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_13-49-38", "policy_reward_mean": {}, "time_this_iter_s": 278.67920446395874, "episodes_this_iter": 24, "training_iteration": 305, "time_total_s": 73758.79197740555, "info": {"num_steps_sampled": 366000, "num_steps_trained": 366000, "default": {"policy_loss": -0.12833836674690247, "vf_explained_var": 0.9781603217124939, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 2.9315381050109863, "entropy": 11.94629955291748, "kl": 0.015499631874263287, "total_loss": 2.8188929557800293}, "sample_time_ms": 242748.703, "grad_time_ms": 694.368, "load_time_ms": 1.537, "update_time_ms": 2.623}, "timesteps_total": 366000, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 366000, "hostname": "cda-server-3", "episode_reward_max": -49.92233445051167}
+{"experiment_id": "7ffa6ff4607a442eb508661143530d5b", "time_since_restore": 73987.31811928749, "episode_len_mean": 50.0, "done": false, "episode_reward_min": -67.96758924730126, "iterations_since_restore": 306, "episodes_total": 7344, "timestamp": 1756468406, "episode_reward_mean": -52.524258915937445, "pid": 1566858, "timesteps_this_iter": 1200, "date": "2025-08-29_13-53-26", "policy_reward_mean": {}, "time_this_iter_s": 228.52614188194275, "episodes_this_iter": 24, "training_iteration": 306, "time_total_s": 73987.31811928749, "info": {"num_steps_sampled": 367200, "num_steps_trained": 367200, "default": {"policy_loss": -0.12210477888584137, "vf_explained_var": 0.9534997940063477, "cur_lr": 4.999999873689376e-05, "cur_kl_coeff": 1.0125000476837158, "vf_loss": 6.509614944458008, "entropy": 11.96830940246582, "kl": 0.012901661917567253, "total_loss": 6.400572776794434}, "sample_time_ms": 243691.988, "grad_time_ms": 694.451, "load_time_ms": 1.575, "update_time_ms": 2.593}, "timesteps_total": 367200, "config": {"input": "sampler", "simple_optimizer": false, "tf_session_args": {"allow_soft_placement": true, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2}, "postprocess_inputs": false, "observation_filter": "MeanStdFilter", "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "num_gpus": 0, "sgd_minibatch_size": 128, "clip_param": 0.3, "grad_clip": null, "vf_loss_coeff": 1.0, "gamma": 0.99, "input_evaluation": null, "sample_async": false, "vf_share_layers": false, "sample_batch_size": 200, "output_max_file_size": 67108864, "preprocessor_pref": "deepmind", "env_config": {"generalize": true, "run_valid": false}, "clip_actions": true, "kl_coeff": 0.2, "num_envs_per_worker": 1, "monitor": false, "kl_target": 0.01, "env": "LEDRO_D_FC", "lr_schedule": null, "lr": 5e-05, "collect_metrics_timeout": 180, "lambda": 1.0, "num_workers": 3, "num_cpus_for_driver": 1, "custom_resources_per_worker": {}, "multiagent": {"policy_graphs": {}, "policies_to_train": null, "policy_mapping_fn": null}, "train_batch_size": 1200, "callbacks": {"on_episode_step": null, "on_sample_end": null, "on_episode_start": null, "on_train_result": null, "on_episode_end": null}, "optimizer": {}, "num_gpus_per_worker": 0, "entropy_coeff": 0.0, "num_cpus_per_worker": 1, "synchronize_filters": true, "output_compress_columns": ["obs", "new_obs"], "clip_rewards": null, "log_level": "INFO", "compress_observations": false, "model": {"conv_activation": "relu", "free_log_std": false, "fcnet_activation": "tanh", "lstm_use_prev_action_reward": false, "conv_filters": null, "lstm_cell_size": 256, "use_lstm": false, "grayscale": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "custom_options": {}, "framestack": true, "custom_preprocessor": null, "custom_model": null, "zero_mean": true, "squash_to_range": false}, "batch_mode": "truncate_episodes", "vf_clip_param": 10.0, "num_sgd_iter": 30, "horizon": 50, "straggler_mitigation": false, "output": null, "use_gae": true}, "node_ip": "10.157.146.3", "num_metric_batches_dropped": 0, "custom_metrics": {}, "timesteps_since_restore": 367200, "hostname": "cda-server-3", "episode_reward_max": -48.9968998602918}
diff --git a/experiments/ledro_d_fc_7nm_run4_horizon_50_range_10_400_400_start_33/PPO_LEDRO_D_FC_0_2025-08-29_14-36-17yoqm6ky6/events.out.tfevents.1756471232.cda-server-6 b/experiments/ledro_d_fc_7nm_run4_horizon_50_range_10_400_400_start_33/PPO_LEDRO_D_FC_0_2025-08-29_14-36-17yoqm6ky6/events.out.tfevents.1756471232.cda-server-6
new file mode 100644
index 0000000..7f039eb
Binary files /dev/null and b/experiments/ledro_d_fc_7nm_run4_horizon_50_range_10_400_400_start_33/PPO_LEDRO_D_FC_0_2025-08-29_14-36-17yoqm6ky6/events.out.tfevents.1756471232.cda-server-6 differ
diff --git a/experiments/ledro_d_fc_7nm_run4_horizon_50_range_10_400_400_start_33/PPO_LEDRO_D_FC_0_2025-08-29_14-36-17yoqm6ky6/final_checkpoint/checkpoint-690 b/experiments/ledro_d_fc_7nm_run4_horizon_50_range_10_400_400_start_33/PPO_LEDRO_D_FC_0_2025-08-29_14-36-17yoqm6ky6/final_checkpoint/checkpoint-690
new file mode 100644
index 0000000..54039f8
Binary files /dev/null and b/experiments/ledro_d_fc_7nm_run4_horizon_50_range_10_400_400_start_33/PPO_LEDRO_D_FC_0_2025-08-29_14-36-17yoqm6ky6/final_checkpoint/checkpoint-690 differ
diff --git a/experiments/ledro_d_fc_7nm_run4_horizon_50_range_10_400_400_start_33/PPO_LEDRO_D_FC_0_2025-08-29_14-36-17yoqm6ky6/final_checkpoint/checkpoint-690.tune_metadata b/experiments/ledro_d_fc_7nm_run4_horizon_50_range_10_400_400_start_33/PPO_LEDRO_D_FC_0_2025-08-29_14-36-17yoqm6ky6/final_checkpoint/checkpoint-690.tune_metadata
new file mode 100644
index 0000000..a9bb302
Binary files /dev/null and b/experiments/ledro_d_fc_7nm_run4_horizon_50_range_10_400_400_start_33/PPO_LEDRO_D_FC_0_2025-08-29_14-36-17yoqm6ky6/final_checkpoint/checkpoint-690.tune_metadata differ
diff --git a/experiments/ledro_d_fc_7nm_run4_horizon_50_range_10_400_400_start_33/PPO_LEDRO_D_FC_0_2025-08-29_14-36-17yoqm6ky6/params.json b/experiments/ledro_d_fc_7nm_run4_horizon_50_range_10_400_400_start_33/PPO_LEDRO_D_FC_0_2025-08-29_14-36-17yoqm6ky6/params.json
new file mode 100644
index 0000000..6b8cc2b
--- /dev/null
+++ b/experiments/ledro_d_fc_7nm_run4_horizon_50_range_10_400_400_start_33/PPO_LEDRO_D_FC_0_2025-08-29_14-36-17yoqm6ky6/params.json
@@ -0,0 +1,18 @@
+{
+ "env": "",
+ "env_config": {
+ "generalize": true,
+ "run_valid": false
+ },
+ "horizon": 50,
+ "model": {
+ "fcnet_hiddens": [
+ 128,
+ 128,
+ 128
+ ]
+ },
+ "num_gpus": 0,
+ "num_workers": 6,
+ "train_batch_size": 1200
+}
\ No newline at end of file
diff --git a/experiments/ledro_d_fc_7nm_run4_horizon_50_range_10_400_400_start_33/PPO_LEDRO_D_FC_0_2025-08-29_14-36-17yoqm6ky6/params.pkl b/experiments/ledro_d_fc_7nm_run4_horizon_50_range_10_400_400_start_33/PPO_LEDRO_D_FC_0_2025-08-29_14-36-17yoqm6ky6/params.pkl
new file mode 100644
index 0000000..66bf083
Binary files /dev/null and b/experiments/ledro_d_fc_7nm_run4_horizon_50_range_10_400_400_start_33/PPO_LEDRO_D_FC_0_2025-08-29_14-36-17yoqm6ky6/params.pkl differ
diff --git a/experiments/ledro_d_fc_7nm_run4_horizon_50_range_10_400_400_start_33/PPO_LEDRO_D_FC_0_2025-08-29_14-36-17yoqm6ky6/progress.csv b/experiments/ledro_d_fc_7nm_run4_horizon_50_range_10_400_400_start_33/PPO_LEDRO_D_FC_0_2025-08-29_14-36-17yoqm6ky6/progress.csv
new file mode 100644
index 0000000..ad46112
--- /dev/null
+++ b/experiments/ledro_d_fc_7nm_run4_horizon_50_range_10_400_400_start_33/PPO_LEDRO_D_FC_0_2025-08-29_14-36-17yoqm6ky6/progress.csv
@@ -0,0 +1,700 @@
+timesteps_total,experiment_id,done,info,pid,time_total_s,episode_reward_mean,hostname,episodes_this_iter,episode_reward_min,policy_reward_mean,episodes_total,node_ip,custom_metrics,episode_reward_max,num_metric_batches_dropped,timesteps_this_iter,date,training_iteration,config,timestamp,episode_len_mean,timesteps_since_restore,time_since_restore,time_this_iter_s,iterations_since_restore
+1200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 238752.488, 'num_steps_sampled': 1200, 'update_time_ms': 1494.553, 'num_steps_trained': 1200, 'load_time_ms': 91.451, 'default': {'kl': 0.02345726452767849, 'cur_lr': 4.999999873689376e-05, 'entropy': 18.652944564819336, 'total_loss': 13102.5712890625, 'cur_kl_coeff': 0.20000000298023224, 'policy_loss': -0.1253841370344162, 'vf_explained_var': -0.020561866462230682, 'vf_loss': 13102.69140625}, 'grad_time_ms': 1736.076}",3934253,242.16078996658325,-230.22058282191497,cda-server-6,24,-248.50570683339015,{},24,10.157.146.6,{},-189.17222398744005,0,1200,2025-08-29_14-40-32,1,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756471232,50.0,1200,242.16078996658325,242.16078996658325,1
+2400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 178035.505, 'num_steps_sampled': 2400, 'update_time_ms': 749.314, 'num_steps_trained': 2400, 'load_time_ms': 46.26, 'default': {'kl': 0.022505946457386017, 'cur_lr': 4.999999873689376e-05, 'entropy': 18.62984848022461, 'total_loss': 12515.01171875, 'cur_kl_coeff': 0.30000001192092896, 'policy_loss': -0.12131085991859436, 'vf_explained_var': 0.046773672103881836, 'vf_loss': 12515.125}, 'grad_time_ms': 1248.081}",3934253,360.25028228759766,-229.7708322779712,cda-server-6,24,-248.50570683339015,{},48,10.157.146.6,{},-189.17222398744005,0,1200,2025-08-29_14-42-30,2,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756471350,50.0,2400,360.25028228759766,118.0894923210144,2
+3600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 160903.856, 'num_steps_sampled': 3600, 'update_time_ms': 500.221, 'num_steps_trained': 3600, 'load_time_ms': 31.074, 'default': {'kl': 0.021168239414691925, 'cur_lr': 4.999999873689376e-05, 'entropy': 18.610870361328125, 'total_loss': 12362.8056640625, 'cur_kl_coeff': 0.44999995827674866, 'policy_loss': -0.12194083631038666, 'vf_explained_var': 0.0496826171875, 'vf_loss': 12362.91796875}, 'grad_time_ms': 1005.561}",3934253,487.41902899742126,-230.6458543464934,cda-server-6,24,-248.50570683339015,{},72,10.157.146.6,{},-189.17222398744005,0,1200,2025-08-29_14-44-37,3,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756471477,50.0,3600,487.41902899742126,127.16874670982361,3
+4800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 153296.846, 'num_steps_sampled': 4800, 'update_time_ms': 375.774, 'num_steps_trained': 4800, 'load_time_ms': 23.521, 'default': {'kl': 0.020337438210844994, 'cur_lr': 4.999999873689376e-05, 'entropy': 18.58652687072754, 'total_loss': 12319.80859375, 'cur_kl_coeff': 0.675000011920929, 'policy_loss': -0.12520265579223633, 'vf_explained_var': 0.05312725529074669, 'vf_loss': 12319.919921875}, 'grad_time_ms': 927.15}",3934253,618.5954301357269,-231.9126016253825,cda-server-6,24,-248.50570683339015,{},96,10.157.146.6,{},-189.17222398744005,0,1200,2025-08-29_14-46-49,4,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756471609,50.0,4800,618.5954301357269,131.17640113830566,4
+6000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 149198.611, 'num_steps_sampled': 6000, 'update_time_ms': 301.15, 'num_steps_trained': 6000, 'load_time_ms': 18.931, 'default': {'kl': 0.017712781205773354, 'cur_lr': 4.999999873689376e-05, 'entropy': 18.564531326293945, 'total_loss': 11199.1513671875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13221319019794464, 'vf_explained_var': 0.030576281249523163, 'vf_loss': 11199.263671875}, 'grad_time_ms': 893.167}",3934253,752.1666700839996,-232.0932859758354,cda-server-6,24,-248.10200411755505,{},120,10.157.146.6,{},-200.99987523969685,0,1200,2025-08-29_14-49-02,5,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756471742,50.0,6000,752.1666700839996,133.5712399482727,5
+7200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 146583.585, 'num_steps_sampled': 7200, 'update_time_ms': 251.309, 'num_steps_trained': 7200, 'load_time_ms': 15.871, 'default': {'kl': 0.01696646213531494, 'cur_lr': 4.999999873689376e-05, 'entropy': 18.5582275390625, 'total_loss': 11126.30859375, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12488171458244324, 'vf_explained_var': 0.012747373431921005, 'vf_loss': 11126.4169921875}, 'grad_time_ms': 863.632}",3934253,886.3993492126465,-233.2256954990561,cda-server-6,24,-248.10200411755505,{},144,10.157.146.6,{},-200.99987523969685,0,1200,2025-08-29_14-51-16,6,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756471876,50.0,7200,886.3993492126465,134.23267912864685,6
+8400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 143834.975, 'num_steps_sampled': 8400, 'update_time_ms': 215.706, 'num_steps_trained': 8400, 'load_time_ms': 13.737, 'default': {'kl': 0.018511280417442322, 'cur_lr': 4.999999873689376e-05, 'entropy': 18.533920288085938, 'total_loss': 10328.9599609375, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12375900149345398, 'vf_explained_var': 0.007189598400145769, 'vf_loss': 10329.0654296875}, 'grad_time_ms': 849.341}",3934253,1014.5144400596619,-232.3667402438889,cda-server-6,24,-248.10200411755505,{},168,10.157.146.6,{},-199.95306198707857,0,1200,2025-08-29_14-53-24,7,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756472004,50.0,8400,1014.5144400596619,128.11509084701538,7
+9600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 141122.885, 'num_steps_sampled': 9600, 'update_time_ms': 189.046, 'num_steps_trained': 9600, 'load_time_ms': 12.13, 'default': {'kl': 0.016686219722032547, 'cur_lr': 4.999999873689376e-05, 'entropy': 18.514015197753906, 'total_loss': 9768.44140625, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1354256272315979, 'vf_explained_var': 0.004887203220278025, 'vf_loss': 9768.5595703125}, 'grad_time_ms': 837.745}",3934253,1137.4176816940308,-230.78368277525772,cda-server-6,24,-247.03019267128656,{},192,10.157.146.6,{},-197.89664123792278,0,1200,2025-08-29_14-55-27,8,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756472127,50.0,9600,1137.4176816940308,122.9032416343689,8
+10800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 140244.999, 'num_steps_sampled': 10800, 'update_time_ms': 168.316, 'num_steps_trained': 10800, 'load_time_ms': 10.847, 'default': {'kl': 0.016497083008289337, 'cur_lr': 4.999999873689376e-05, 'entropy': 18.499237060546875, 'total_loss': 9278.640625, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1313558965921402, 'vf_explained_var': 0.002922866027802229, 'vf_loss': 9278.7548828125}, 'grad_time_ms': 828.448}",3934253,1271.4018051624298,-229.5842021622292,cda-server-6,24,-247.858779289968,{},216,10.157.146.6,{},-197.89664123792278,0,1200,2025-08-29_14-57-41,9,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756472261,50.0,10800,1271.4018051624298,133.98412346839905,9
+12000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 138991.608, 'num_steps_sampled': 12000, 'update_time_ms': 151.715, 'num_steps_trained': 12000, 'load_time_ms': 9.854, 'default': {'kl': 0.018143020570278168, 'cur_lr': 4.999999873689376e-05, 'entropy': 18.48088264465332, 'total_loss': 9191.5791015625, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13637499511241913, 'vf_explained_var': 0.0037107665557414293, 'vf_loss': 9191.697265625}, 'grad_time_ms': 817.367}",3934253,1399.8384637832642,-228.35528496800046,cda-server-6,24,-247.858779289968,{},240,10.157.146.6,{},-195.66306370400125,0,1200,2025-08-29_14-59-50,10,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756472390,50.0,12000,1399.8384637832642,128.43665862083435,10
+13200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 126993.623, 'num_steps_sampled': 13200, 'update_time_ms': 2.449, 'num_steps_trained': 13200, 'load_time_ms': 0.766, 'default': {'kl': 0.016372594982385635, 'cur_lr': 4.999999873689376e-05, 'entropy': 18.44902229309082, 'total_loss': 8664.150390625, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12930560111999512, 'vf_explained_var': 0.014810138382017612, 'vf_loss': 8664.2626953125}, 'grad_time_ms': 693.016}",3934253,1519.110630273819,-226.7992343391553,cda-server-6,24,-247.858779289968,{},264,10.157.146.6,{},-188.91056735653865,0,1200,2025-08-29_15-01-49,11,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756472509,50.0,13200,1519.110630273819,119.27216649055481,11
+14400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 126796.976, 'num_steps_sampled': 14400, 'update_time_ms': 2.254, 'num_steps_trained': 14400, 'load_time_ms': 0.723, 'default': {'kl': 0.018367202952504158, 'cur_lr': 4.999999873689376e-05, 'entropy': 18.43258285522461, 'total_loss': 9163.8193359375, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1256006956100464, 'vf_explained_var': 0.001056631444953382, 'vf_loss': 9163.92578125}, 'grad_time_ms': 689.564}",3934253,1635.195505142212,-227.8824827519844,cda-server-6,24,-247.858779289968,{},288,10.157.146.6,{},-188.91056735653865,0,1200,2025-08-29_15-03-45,12,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756472625,50.0,14400,1635.195505142212,116.08487486839294,12
+15600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 122994.689, 'num_steps_sampled': 15600, 'update_time_ms': 2.27, 'num_steps_trained': 15600, 'load_time_ms': 0.718, 'default': {'kl': 0.018263446167111397, 'cur_lr': 4.999999873689376e-05, 'entropy': 18.406959533691406, 'total_loss': 8039.34375, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13872545957565308, 'vf_explained_var': 0.009270284324884415, 'vf_loss': 8039.46337890625}, 'grad_time_ms': 711.946}",3934253,1724.5654287338257,-226.95394541182313,cda-server-6,24,-246.89273563832404,{},312,10.157.146.6,{},-188.91056735653865,0,1200,2025-08-29_15-05-15,13,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756472715,50.0,15600,1724.5654287338257,89.36992359161377,13
+16800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 120749.929, 'num_steps_sampled': 16800, 'update_time_ms': 2.298, 'num_steps_trained': 16800, 'load_time_ms': 0.694, 'default': {'kl': 0.01851937174797058, 'cur_lr': 4.999999873689376e-05, 'entropy': 18.40781593322754, 'total_loss': 7785.65380859375, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1454668492078781, 'vf_explained_var': 0.0034669903106987476, 'vf_loss': 7785.7802734375}, 'grad_time_ms': 718.79}",3934253,1833.362226486206,-226.5395327474367,cda-server-6,24,-246.89273563832404,{},336,10.157.146.6,{},-188.91056735653865,0,1200,2025-08-29_15-07-03,14,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756472823,50.0,16800,1833.362226486206,108.79679775238037,14
+18000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 120444.62, 'num_steps_sampled': 18000, 'update_time_ms': 2.292, 'num_steps_trained': 18000, 'load_time_ms': 0.695, 'default': {'kl': 0.018088672310113907, 'cur_lr': 4.999999873689376e-05, 'entropy': 18.373947143554688, 'total_loss': 7424.033203125, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13359440863132477, 'vf_explained_var': -2.5298859327449463e-06, 'vf_loss': 7424.1494140625}, 'grad_time_ms': 707.939}",3934253,1963.7715697288513,-226.28930702200313,cda-server-6,24,-246.89273563832404,{},360,10.157.146.6,{},-192.68354188559,0,1200,2025-08-29_15-09-14,15,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756472954,50.0,18000,1963.7715697288513,130.40934324264526,15
+19200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 115962.542, 'num_steps_sampled': 19200, 'update_time_ms': 2.277, 'num_steps_trained': 19200, 'load_time_ms': 0.705, 'default': {'kl': 0.018010612577199936, 'cur_lr': 4.999999873689376e-05, 'entropy': 18.32969856262207, 'total_loss': 7933.4677734375, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13802139461040497, 'vf_explained_var': -0.001161250751465559, 'vf_loss': 7933.5869140625}, 'grad_time_ms': 717.734}",3934253,2053.281415939331,-225.430023675914,cda-server-6,24,-247.85240578397764,{},384,10.157.146.6,{},-192.68354188559,0,1200,2025-08-29_15-10-43,16,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756473043,50.0,19200,2053.281415939331,89.50984621047974,16
+20400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 115180.178, 'num_steps_sampled': 20400, 'update_time_ms': 2.306, 'num_steps_trained': 20400, 'load_time_ms': 0.674, 'default': {'kl': 0.01733492501080036, 'cur_lr': 4.999999873689376e-05, 'entropy': 18.352266311645508, 'total_loss': 6935.912109375, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13916505873203278, 'vf_explained_var': 0.0398666188120842, 'vf_loss': 6936.03369140625}, 'grad_time_ms': 715.961}",3934253,2173.555982351303,-224.31972516845806,cda-server-6,24,-247.85240578397764,{},408,10.157.146.6,{},-192.56745469224097,0,1200,2025-08-29_15-12-44,17,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756473164,50.0,20400,2173.555982351303,120.27456641197205,17
+21600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 115446.416, 'num_steps_sampled': 21600, 'update_time_ms': 2.353, 'num_steps_trained': 21600, 'load_time_ms': 0.645, 'default': {'kl': 0.018643349409103394, 'cur_lr': 4.999999873689376e-05, 'entropy': 18.32407569885254, 'total_loss': 7109.57861328125, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14262330532073975, 'vf_explained_var': 0.058559127151966095, 'vf_loss': 7109.7021484375}, 'grad_time_ms': 716.764}",3934253,2299.1294887065887,-224.86252533298918,cda-server-6,24,-248.00111036780248,{},432,10.157.146.6,{},-192.56745469224097,0,1200,2025-08-29_15-14-49,18,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756473289,50.0,21600,2299.1294887065887,125.57350635528564,18
+22800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 111634.23, 'num_steps_sampled': 22800, 'update_time_ms': 2.343, 'num_steps_trained': 22800, 'load_time_ms': 0.647, 'default': {'kl': 0.017198346555233, 'cur_lr': 4.999999873689376e-05, 'entropy': 18.320524215698242, 'total_loss': 6918.37060546875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13064756989479065, 'vf_explained_var': 0.07814642041921616, 'vf_loss': 6918.48388671875}, 'grad_time_ms': 711.846}",3934253,2394.942296743393,-225.21955188410809,cda-server-6,24,-248.00111036780248,{},456,10.157.146.6,{},-192.56745469224097,0,1200,2025-08-29_15-16-25,19,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756473385,50.0,22800,2394.942296743393,95.8128080368042,19
+24000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 111642.021, 'num_steps_sampled': 24000, 'update_time_ms': 2.345, 'num_steps_trained': 24000, 'load_time_ms': 0.617, 'default': {'kl': 0.018709510564804077, 'cur_lr': 4.999999873689376e-05, 'entropy': 18.2652645111084, 'total_loss': 6797.3310546875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14481525123119354, 'vf_explained_var': 0.09185083210468292, 'vf_loss': 6797.45703125}, 'grad_time_ms': 720.998}",3934253,2523.54922413826,-224.69537291467503,cda-server-6,24,-248.00111036780248,{},480,10.157.146.6,{},-190.85068285650394,0,1200,2025-08-29_15-18-34,20,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756473514,50.0,24000,2523.54922413826,128.60692739486694,20
+25200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 111823.423, 'num_steps_sampled': 25200, 'update_time_ms': 2.43, 'num_steps_trained': 25200, 'load_time_ms': 0.654, 'default': {'kl': 0.016120517626404762, 'cur_lr': 4.999999873689376e-05, 'entropy': 18.257076263427734, 'total_loss': 6505.86279296875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14086978137493134, 'vf_explained_var': 0.0601482056081295, 'vf_loss': 6505.98779296875}, 'grad_time_ms': 743.136}",3934253,2644.859076499939,-225.42085905668347,cda-server-6,24,-248.00111036780248,{},504,10.157.146.6,{},-190.85068285650394,0,1200,2025-08-29_15-20-35,21,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756473635,50.0,25200,2644.859076499939,121.30985236167908,21
+26400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 108554.114, 'num_steps_sampled': 26400, 'update_time_ms': 2.528, 'num_steps_trained': 26400, 'load_time_ms': 0.653, 'default': {'kl': 0.01815476268529892, 'cur_lr': 4.999999873689376e-05, 'entropy': 18.25238037109375, 'total_loss': 6361.66943359375, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1422284096479416, 'vf_explained_var': 0.0871841087937355, 'vf_loss': 6361.79345703125}, 'grad_time_ms': 733.411}",3934253,2728.1552817821503,-227.12070903133855,cda-server-6,24,-247.372242841637,{},528,10.157.146.6,{},-190.85068285650394,0,1200,2025-08-29_15-21-58,22,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756473718,50.0,26400,2728.1552817821503,83.2962052822113,22
+27600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 113536.284, 'num_steps_sampled': 27600, 'update_time_ms': 2.523, 'num_steps_trained': 27600, 'load_time_ms': 0.646, 'default': {'kl': 0.01872488297522068, 'cur_lr': 4.999999873689376e-05, 'entropy': 18.202598571777344, 'total_loss': 5540.90380859375, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14064835011959076, 'vf_explained_var': 0.09507162123918533, 'vf_loss': 5541.025390625}, 'grad_time_ms': 721.676}",3934253,2867.229010820389,-224.52522155211645,cda-server-6,24,-247.372242841637,{},552,10.157.146.6,{},-188.93533640553093,0,1200,2025-08-29_15-24-17,23,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756473857,50.0,27600,2867.229010820389,139.07372903823853,23
+28800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 111666.508, 'num_steps_sampled': 28800, 'update_time_ms': 2.53, 'num_steps_trained': 28800, 'load_time_ms': 0.645, 'default': {'kl': 0.017991013824939728, 'cur_lr': 4.999999873689376e-05, 'entropy': 18.139867782592773, 'total_loss': 5488.4775390625, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.15115779638290405, 'vf_explained_var': 0.122310571372509, 'vf_loss': 5488.6103515625}, 'grad_time_ms': 706.538}",3934253,2957.178115129471,-222.86015856207715,cda-server-6,24,-248.2345499737599,{},576,10.157.146.6,{},-188.93533640553093,0,1200,2025-08-29_15-25-47,24,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756473947,50.0,28800,2957.178115129471,89.94910430908203,24
+30000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 110789.341, 'num_steps_sampled': 30000, 'update_time_ms': 2.507, 'num_steps_trained': 30000, 'load_time_ms': 0.649, 'default': {'kl': 0.018119478598237038, 'cur_lr': 4.999999873689376e-05, 'entropy': 18.104427337646484, 'total_loss': 5291.33203125, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1454295516014099, 'vf_explained_var': 0.1271432340145111, 'vf_loss': 5291.458984375}, 'grad_time_ms': 689.769}",3934253,3078.6474380493164,-220.54939918657251,cda-server-6,24,-248.2345499737599,{},600,10.157.146.6,{},-185.60920330149142,0,1200,2025-08-29_15-27-49,25,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756474069,50.0,30000,3078.6474380493164,121.46932291984558,25
+31200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 114266.039, 'num_steps_sampled': 31200, 'update_time_ms': 2.532, 'num_steps_trained': 31200, 'load_time_ms': 0.644, 'default': {'kl': 0.018067501485347748, 'cur_lr': 4.999999873689376e-05, 'entropy': 18.099180221557617, 'total_loss': 4856.0693359375, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1323235034942627, 'vf_explained_var': 0.12717147171497345, 'vf_loss': 4856.18310546875}, 'grad_time_ms': 666.322}",3934253,3202.6897122859955,-217.74776505287662,cda-server-6,24,-248.2345499737599,{},624,10.157.146.6,{},-185.60920330149142,0,1200,2025-08-29_15-29-53,26,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756474193,50.0,31200,3202.6897122859955,124.04227423667908,26
+32400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 114420.868, 'num_steps_sampled': 32400, 'update_time_ms': 2.546, 'num_steps_trained': 32400, 'load_time_ms': 0.642, 'default': {'kl': 0.01893593929708004, 'cur_lr': 4.999999873689376e-05, 'entropy': 18.082481384277344, 'total_loss': 4736.587890625, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14160507917404175, 'vf_explained_var': 0.1733734905719757, 'vf_loss': 4736.7099609375}, 'grad_time_ms': 654.305}",3934253,3324.3915185928345,-216.32067322708596,cda-server-6,24,-248.2345499737599,{},648,10.157.146.6,{},-185.60920330149142,0,1200,2025-08-29_15-31-55,27,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756474315,50.0,32400,3324.3915185928345,121.70180630683899,27
+33600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 113001.238, 'num_steps_sampled': 33600, 'update_time_ms': 2.484, 'num_steps_trained': 33600, 'load_time_ms': 0.644, 'default': {'kl': 0.018984422087669373, 'cur_lr': 4.999999873689376e-05, 'entropy': 18.03122329711914, 'total_loss': 4447.91552734375, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.15685193240642548, 'vf_explained_var': 0.14473694562911987, 'vf_loss': 4448.052734375}, 'grad_time_ms': 647.223}",3934253,3435.6978681087494,-213.80917812804458,cda-server-6,24,-248.83030415581862,{},672,10.157.146.6,{},-185.60920330149142,0,1200,2025-08-29_15-33-46,28,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756474426,50.0,33600,3435.6978681087494,111.30634951591492,28
+34800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 116696.335, 'num_steps_sampled': 34800, 'update_time_ms': 2.497, 'num_steps_trained': 34800, 'load_time_ms': 0.646, 'default': {'kl': 0.01779862865805626, 'cur_lr': 4.999999873689376e-05, 'entropy': 18.02900505065918, 'total_loss': 4699.4326171875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13835091888904572, 'vf_explained_var': 0.21267952024936676, 'vf_loss': 4699.552734375}, 'grad_time_ms': 639.428}",3934253,3568.38410115242,-214.57597202649774,cda-server-6,24,-248.83030415581862,{},696,10.157.146.6,{},-191.44960194830855,0,1200,2025-08-29_15-35-59,29,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756474559,50.0,34800,3568.38410115242,132.68623304367065,29
+36000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 117910.592, 'num_steps_sampled': 36000, 'update_time_ms': 2.546, 'num_steps_trained': 36000, 'load_time_ms': 0.647, 'default': {'kl': 0.0186467245221138, 'cur_lr': 4.999999873689376e-05, 'entropy': 18.017282485961914, 'total_loss': 4788.78857421875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14452102780342102, 'vf_explained_var': 0.1922873705625534, 'vf_loss': 4788.9140625}, 'grad_time_ms': 635.195}",3934253,3709.0914623737335,-216.39424869499814,cda-server-6,24,-248.83030415581862,{},720,10.157.146.6,{},-189.41091576437802,0,1200,2025-08-29_15-38-19,30,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756474699,50.0,36000,3709.0914623737335,140.70736122131348,30
+37200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 121207.566, 'num_steps_sampled': 37200, 'update_time_ms': 2.549, 'num_steps_trained': 37200, 'load_time_ms': 0.611, 'default': {'kl': 0.017331527546048164, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.992008209228516, 'total_loss': 4373.1201171875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13839353621006012, 'vf_explained_var': 0.04371914640069008, 'vf_loss': 4373.24072265625}, 'grad_time_ms': 637.987}",3934253,3863.398061275482,-215.19791028193805,cda-server-6,24,-248.83030415581862,{},744,10.157.146.6,{},-186.86999539137864,0,1200,2025-08-29_15-40-54,31,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756474854,50.0,37200,3863.398061275482,154.30659890174866,31
+38400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 124453.716, 'num_steps_sampled': 38400, 'update_time_ms': 2.442, 'num_steps_trained': 38400, 'load_time_ms': 0.613, 'default': {'kl': 0.017102720215916634, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.92547607421875, 'total_loss': 3819.66357421875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13893364369869232, 'vf_explained_var': 0.1954089254140854, 'vf_loss': 3819.78515625}, 'grad_time_ms': 644.967}",3934253,3979.224608182907,-215.13311737404922,cda-server-6,24,-246.02100713653664,{},768,10.157.146.6,{},-186.86999539137864,0,1200,2025-08-29_15-42-50,32,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756474970,50.0,38400,3979.224608182907,115.82654690742493,32
+39600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 123454.297, 'num_steps_sampled': 39600, 'update_time_ms': 2.494, 'num_steps_trained': 39600, 'load_time_ms': 0.612, 'default': {'kl': 0.017682187259197235, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.938262939453125, 'total_loss': 3737.1103515625, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14358346164226532, 'vf_explained_var': 0.1921062171459198, 'vf_loss': 3737.236083984375}, 'grad_time_ms': 659.685}",3934253,4108.452016592026,-213.94500279124793,cda-server-6,24,-246.02100713653664,{},792,10.157.146.6,{},-186.86999539137864,0,1200,2025-08-29_15-44-59,33,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756475099,50.0,39600,4108.452016592026,129.22740840911865,33
+40800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 128326.13, 'num_steps_sampled': 40800, 'update_time_ms': 2.44, 'num_steps_trained': 40800, 'load_time_ms': 0.608, 'default': {'kl': 0.017134059220552444, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.834041595458984, 'total_loss': 3461.917724609375, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12815701961517334, 'vf_explained_var': 0.15495187044143677, 'vf_loss': 3462.0283203125}, 'grad_time_ms': 675.352}",3934253,4247.277045726776,-211.12190019537888,cda-server-6,24,-246.02100713653664,{},816,10.157.146.6,{},-186.86999539137864,0,1200,2025-08-29_15-47-18,34,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756475238,50.0,40800,4247.277045726776,138.82502913475037,34
+42000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 125027.49, 'num_steps_sampled': 42000, 'update_time_ms': 2.422, 'num_steps_trained': 42000, 'load_time_ms': 0.614, 'default': {'kl': 0.018565503880381584, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.86197280883789, 'total_loss': 4044.3408203125, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14776770770549774, 'vf_explained_var': 0.1975460797548294, 'vf_loss': 4044.4697265625}, 'grad_time_ms': 709.139}",3934253,4336.0987548828125,-211.779697417606,cda-server-6,24,-244.79919426101299,{},840,10.157.146.6,{},-185.87812229652314,0,1200,2025-08-29_15-48-46,35,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756475326,50.0,42000,4336.0987548828125,88.82170915603638,35
+43200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 124788.062, 'num_steps_sampled': 43200, 'update_time_ms': 2.438, 'num_steps_trained': 43200, 'load_time_ms': 0.615, 'default': {'kl': 0.017858348786830902, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.815601348876953, 'total_loss': 3304.437255859375, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14674967527389526, 'vf_explained_var': 0.30027350783348083, 'vf_loss': 3304.56591796875}, 'grad_time_ms': 723.817}",3934253,4457.893758058548,-212.04843016952287,cda-server-6,24,-245.58104877489959,{},864,10.157.146.6,{},-185.87812229652314,0,1200,2025-08-29_15-50-48,36,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756475448,50.0,43200,4457.893758058548,121.79500317573547,36
+44400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 124188.457, 'num_steps_sampled': 44400, 'update_time_ms': 2.4, 'num_steps_trained': 44400, 'load_time_ms': 0.611, 'default': {'kl': 0.01844792626798153, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.81826400756836, 'total_loss': 3297.3232421875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1480633020401001, 'vf_explained_var': 0.19995717704296112, 'vf_loss': 3297.452392578125}, 'grad_time_ms': 735.92}",3934253,4573.720880746841,-211.19359964775035,cda-server-6,24,-245.58104877489959,{},888,10.157.146.6,{},-185.87812229652314,0,1200,2025-08-29_15-52-44,37,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756475564,50.0,44400,4573.720880746841,115.82712268829346,37
+45600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 126123.452, 'num_steps_sampled': 45600, 'update_time_ms': 2.4, 'num_steps_trained': 45600, 'load_time_ms': 0.649, 'default': {'kl': 0.018443183973431587, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.755903244018555, 'total_loss': 3353.221435546875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.15454323589801788, 'vf_explained_var': 0.2896014153957367, 'vf_loss': 3353.357177734375}, 'grad_time_ms': 738.131}",3934253,4704.400423049927,-213.00286027217822,cda-server-6,24,-247.5537867115574,{},912,10.157.146.6,{},-185.87812229652314,0,1200,2025-08-29_15-54-55,38,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756475695,50.0,45600,4704.400423049927,130.67954230308533,38
+46800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 121236.767, 'num_steps_sampled': 46800, 'update_time_ms': 2.451, 'num_steps_trained': 46800, 'load_time_ms': 0.646, 'default': {'kl': 0.018317891284823418, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.811492919921875, 'total_loss': 3417.5546875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1502545177936554, 'vf_explained_var': 0.2569473087787628, 'vf_loss': 3417.686279296875}, 'grad_time_ms': 758.619}",3934253,4788.425406217575,-212.3051676911543,cda-server-6,24,-247.5537867115574,{},936,10.157.146.6,{},-181.43706975607378,0,1200,2025-08-29_15-56-19,39,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756475779,50.0,46800,4788.425406217575,84.02498316764832,39
+48000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 119440.389, 'num_steps_sampled': 48000, 'update_time_ms': 2.464, 'num_steps_trained': 48000, 'load_time_ms': 0.648, 'default': {'kl': 0.01919081062078476, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.807842254638672, 'total_loss': 3161.85986328125, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.15894289314746857, 'vf_explained_var': 0.35566556453704834, 'vf_loss': 3161.999267578125}, 'grad_time_ms': 738.052}",3934253,4910.962848186493,-213.84192706556107,cda-server-6,24,-247.5537867115574,{},960,10.157.146.6,{},-181.43706975607378,0,1200,2025-08-29_15-58-21,40,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756475901,50.0,48000,4910.962848186493,122.53744196891785,40
+49200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 115781.532, 'num_steps_sampled': 49200, 'update_time_ms': 2.471, 'num_steps_trained': 49200, 'load_time_ms': 0.65, 'default': {'kl': 0.018592309206724167, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.775829315185547, 'total_loss': 3190.97412109375, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1547202467918396, 'vf_explained_var': 0.2281986027956009, 'vf_loss': 3191.1103515625}, 'grad_time_ms': 717.619}",3934253,5028.476491689682,-213.4334468931419,cda-server-6,24,-247.5537867115574,{},984,10.157.146.6,{},-175.2855057359052,0,1200,2025-08-29_16-00-19,41,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756476019,50.0,49200,5028.476491689682,117.51364350318909,41
+50400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 116700.271, 'num_steps_sampled': 50400, 'update_time_ms': 2.521, 'num_steps_trained': 50400, 'load_time_ms': 0.649, 'default': {'kl': 0.017873523756861687, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.768165588378906, 'total_loss': 2994.9541015625, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1501152515411377, 'vf_explained_var': 0.32763707637786865, 'vf_loss': 2995.0859375}, 'grad_time_ms': 708.999}",3934253,5153.405420064926,-215.7174857830358,cda-server-6,24,-246.37354808212874,{},1008,10.157.146.6,{},-175.2855057359052,0,1200,2025-08-29_16-02-24,42,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756476144,50.0,50400,5153.405420064926,124.92892837524414,42
+51600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 113745.523, 'num_steps_sampled': 51600, 'update_time_ms': 2.494, 'num_steps_trained': 51600, 'load_time_ms': 0.652, 'default': {'kl': 0.018063882365822792, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.706390380859375, 'total_loss': 2835.5146484375, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1458158642053604, 'vf_explained_var': 0.32134178280830383, 'vf_loss': 2835.642333984375}, 'grad_time_ms': 712.86}",3934253,5253.124094724655,-214.39119330004388,cda-server-6,24,-246.37354808212874,{},1032,10.157.146.6,{},-175.05672191815188,0,1200,2025-08-29_16-04-04,43,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756476244,50.0,51600,5253.124094724655,99.718674659729,43
+52800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 112197.151, 'num_steps_sampled': 52800, 'update_time_ms': 2.539, 'num_steps_trained': 52800, 'load_time_ms': 0.693, 'default': {'kl': 0.018111437559127808, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.677021026611328, 'total_loss': 3190.59130859375, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14791905879974365, 'vf_explained_var': 0.15911920368671417, 'vf_loss': 3190.720947265625}, 'grad_time_ms': 710.067}",3934253,5376.435137987137,-213.18503772057986,cda-server-6,24,-246.37354808212874,{},1056,10.157.146.6,{},-175.05672191815188,0,1200,2025-08-29_16-06-07,44,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756476367,50.0,52800,5376.435137987137,123.31104326248169,44
+54000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 116778.42, 'num_steps_sampled': 54000, 'update_time_ms': 2.573, 'num_steps_trained': 54000, 'load_time_ms': 0.695, 'default': {'kl': 0.01823728159070015, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.698951721191406, 'total_loss': 2804.24169921875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.15112829208374023, 'vf_explained_var': 0.2769123613834381, 'vf_loss': 2804.37451171875}, 'grad_time_ms': 704.293}",3934253,5511.011833429337,-213.08326170254938,cda-server-6,24,-245.10308690094269,{},1080,10.157.146.6,{},-175.05672191815188,0,1200,2025-08-29_16-08-22,45,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756476502,50.0,54000,5511.011833429337,134.5766954421997,45
+55200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 118468.282, 'num_steps_sampled': 55200, 'update_time_ms': 2.579, 'num_steps_trained': 55200, 'load_time_ms': 0.722, 'default': {'kl': 0.017772618681192398, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.67725372314453, 'total_loss': 2877.27392578125, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1525329202413559, 'vf_explained_var': 0.30773845314979553, 'vf_loss': 2877.40869140625}, 'grad_time_ms': 706.245}",3934253,5649.724349737167,-211.9924811523262,cda-server-6,24,-244.81933204732172,{},1104,10.157.146.6,{},-175.05672191815188,0,1200,2025-08-29_16-10-40,46,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756476640,50.0,55200,5649.724349737167,138.7125163078308,46
+56400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 119917.311, 'num_steps_sampled': 56400, 'update_time_ms': 2.571, 'num_steps_trained': 56400, 'load_time_ms': 0.729, 'default': {'kl': 0.017335502430796623, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.574216842651367, 'total_loss': 2686.016845703125, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.15624472498893738, 'vf_explained_var': 0.31639328598976135, 'vf_loss': 2686.155517578125}, 'grad_time_ms': 704.712}",3934253,5780.025140762329,-212.06696124329548,cda-server-6,24,-244.81933204732172,{},1128,10.157.146.6,{},-176.47354464694985,0,1200,2025-08-29_16-12-51,47,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756476771,50.0,56400,5780.025140762329,130.30079102516174,47
+57600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 120133.195, 'num_steps_sampled': 57600, 'update_time_ms': 2.578, 'num_steps_trained': 57600, 'load_time_ms': 0.692, 'default': {'kl': 0.018136359751224518, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.61043930053711, 'total_loss': 2544.2529296875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.15641115605831146, 'vf_explained_var': 0.32302284240722656, 'vf_loss': 2544.39111328125}, 'grad_time_ms': 705.707}",3934253,5912.872404336929,-211.27840171927173,cda-server-6,24,-242.75502909465445,{},1152,10.157.146.6,{},-180.22750393736035,0,1200,2025-08-29_16-15-03,48,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756476903,50.0,57600,5912.872404336929,132.84726357460022,48
+58800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 126388.469, 'num_steps_sampled': 58800, 'update_time_ms': 2.5, 'num_steps_trained': 58800, 'load_time_ms': 0.692, 'default': {'kl': 0.017506470903754234, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.62823486328125, 'total_loss': 3480.99951171875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.15134403109550476, 'vf_explained_var': 0.1799653172492981, 'vf_loss': 3481.13330078125}, 'grad_time_ms': 699.242}",3934253,6059.384567737579,-211.70786122380647,cda-server-6,24,-243.36017384063356,{},1176,10.157.146.6,{},-165.89434605077207,0,1200,2025-08-29_16-17-30,49,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756477050,50.0,58800,6059.384567737579,146.51216340065002,49
+60000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 127471.196, 'num_steps_sampled': 60000, 'update_time_ms': 2.503, 'num_steps_trained': 60000, 'load_time_ms': 0.688, 'default': {'kl': 0.0181845985352993, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.62104606628418, 'total_loss': 3795.71875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14132875204086304, 'vf_explained_var': -0.15382134914398193, 'vf_loss': 3795.842041015625}, 'grad_time_ms': 717.066}",3934253,6192.928519487381,-210.11993828827156,cda-server-6,24,-243.36017384063356,{},1200,10.157.146.6,{},-165.89434605077207,0,1200,2025-08-29_16-19-44,50,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756477184,50.0,60000,6192.928519487381,133.54395174980164,50
+61200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 127189.731, 'num_steps_sampled': 61200, 'update_time_ms': 2.526, 'num_steps_trained': 61200, 'load_time_ms': 0.684, 'default': {'kl': 0.018260452896356583, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.566057205200195, 'total_loss': 3529.8896484375, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1244841143488884, 'vf_explained_var': -0.031975701451301575, 'vf_loss': 3529.995361328125}, 'grad_time_ms': 746.291}",3934253,6307.920372962952,-208.46888390923715,cda-server-6,24,-243.36017384063356,{},1224,10.157.146.6,{},-165.89434605077207,0,1200,2025-08-29_16-21-39,51,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756477299,50.0,61200,6307.920372962952,114.99185347557068,51
+62400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 126173.789, 'num_steps_sampled': 62400, 'update_time_ms': 2.55, 'num_steps_trained': 62400, 'load_time_ms': 0.69, 'default': {'kl': 0.018367886543273926, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.582782745361328, 'total_loss': 2911.344482421875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.15388014912605286, 'vf_explained_var': 0.11462072283029556, 'vf_loss': 2911.47998046875}, 'grad_time_ms': 762.265}",3934253,6422.849180936813,-207.44610162930013,cda-server-6,24,-243.36017384063356,{},1248,10.157.146.6,{},-165.89434605077207,0,1200,2025-08-29_16-23-34,52,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756477414,50.0,62400,6422.849180936813,114.9288079738617,52
+63600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 127713.001, 'num_steps_sampled': 63600, 'update_time_ms': 2.559, 'num_steps_trained': 63600, 'load_time_ms': 0.695, 'default': {'kl': 0.0152328722178936, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.559120178222656, 'total_loss': 2936.9033203125, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12449096143245697, 'vf_explained_var': 0.13940726220607758, 'vf_loss': 2937.01220703125}, 'grad_time_ms': 754.196}",3934253,6537.878677845001,-205.3037430007791,cda-server-6,24,-243.33831834713772,{},1272,10.157.146.6,{},-165.89434605077207,0,1200,2025-08-29_16-25-29,53,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756477529,50.0,63600,6537.878677845001,115.02949690818787,53
+64800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 128614.03, 'num_steps_sampled': 64800, 'update_time_ms': 2.534, 'num_steps_trained': 64800, 'load_time_ms': 0.655, 'default': {'kl': 0.015593416057527065, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.57250213623047, 'total_loss': 3269.6923828125, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12995809316635132, 'vf_explained_var': 0.041274651885032654, 'vf_loss': 3269.806640625}, 'grad_time_ms': 745.365}",3934253,6670.112357854843,-205.87594885722905,cda-server-6,24,-243.33831834713772,{},1296,10.157.146.6,{},-171.90663959860424,0,1200,2025-08-29_16-27-41,54,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756477661,50.0,64800,6670.112357854843,132.23368000984192,54
+66000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 129046.204, 'num_steps_sampled': 66000, 'update_time_ms': 2.52, 'num_steps_trained': 66000, 'load_time_ms': 0.639, 'default': {'kl': 0.01628641039133072, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.494834899902344, 'total_loss': 3003.40478515625, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.133028045296669, 'vf_explained_var': -0.06501490622758865, 'vf_loss': 3003.521240234375}, 'grad_time_ms': 721.143}",3934253,6808.767722606659,-204.17025147553716,cda-server-6,24,-239.4554018600887,{},1320,10.157.146.6,{},-174.49680020462705,0,1200,2025-08-29_16-29-59,55,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756477799,50.0,66000,6808.767722606659,138.6553647518158,55
+67200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 124872.43, 'num_steps_sampled': 67200, 'update_time_ms': 2.474, 'num_steps_trained': 67200, 'load_time_ms': 0.617, 'default': {'kl': 0.018313659355044365, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.51993751525879, 'total_loss': 3227.48779296875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14256832003593445, 'vf_explained_var': -0.046293098479509354, 'vf_loss': 3227.612060546875}, 'grad_time_ms': 724.948}",3934253,6905.780424118042,-202.69798806398597,cda-server-6,24,-239.4554018600887,{},1344,10.157.146.6,{},-174.40608955184834,0,1200,2025-08-29_16-31-36,56,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756477896,50.0,67200,6905.780424118042,97.01270151138306,56
+68400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 124572.464, 'num_steps_sampled': 68400, 'update_time_ms': 2.557, 'num_steps_trained': 68400, 'load_time_ms': 0.617, 'default': {'kl': 0.015577811747789383, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.429256439208984, 'total_loss': 2839.56689453125, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1422598659992218, 'vf_explained_var': 0.25796782970428467, 'vf_loss': 2839.693115234375}, 'grad_time_ms': 725.939}",3934253,7033.093000173569,-203.0628973147633,cda-server-6,24,-236.39727673502475,{},1368,10.157.146.6,{},-174.40608955184834,0,1200,2025-08-29_16-33-44,57,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756478024,50.0,68400,7033.093000173569,127.31257605552673,57
+69600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 127368.019, 'num_steps_sampled': 69600, 'update_time_ms': 2.587, 'num_steps_trained': 69600, 'load_time_ms': 0.644, 'default': {'kl': 0.01565130613744259, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.373971939086914, 'total_loss': 3137.237548828125, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1458209902048111, 'vf_explained_var': 0.18359674513339996, 'vf_loss': 3137.36767578125}, 'grad_time_ms': 699.675}",3934253,7193.634396314621,-201.6865593275633,cda-server-6,24,-237.16475391834197,{},1392,10.157.146.6,{},-162.37320864558674,0,1200,2025-08-29_16-36-24,58,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756478184,50.0,69600,7193.634396314621,160.54139614105225,58
+70800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 126269.84, 'num_steps_sampled': 70800, 'update_time_ms': 2.615, 'num_steps_trained': 70800, 'load_time_ms': 0.643, 'default': {'kl': 0.017643585801124573, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.52008819580078, 'total_loss': 2227.716064453125, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14641273021697998, 'vf_explained_var': 0.3743492662906647, 'vf_loss': 2227.8447265625}, 'grad_time_ms': 696.813}",3934253,7329.136283874512,-202.4014445057027,cda-server-6,24,-237.16475391834197,{},1416,10.157.146.6,{},-162.37320864558674,0,1200,2025-08-29_16-38-40,59,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756478320,50.0,70800,7329.136283874512,135.50188755989075,59
+72000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 126855.389, 'num_steps_sampled': 72000, 'update_time_ms': 2.553, 'num_steps_trained': 72000, 'load_time_ms': 0.641, 'default': {'kl': 0.0166630856692791, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.444067001342773, 'total_loss': 2494.462646484375, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1459917575120926, 'vf_explained_var': 0.32033035159111023, 'vf_loss': 2494.591796875}, 'grad_time_ms': 699.478}",3934253,7468.560915708542,-203.28037131250483,cda-server-6,24,-242.41267235711027,{},1440,10.157.146.6,{},-162.37320864558674,0,1200,2025-08-29_16-40-59,60,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756478459,50.0,72000,7468.560915708542,139.42463183403015,60
+73200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 127224.631, 'num_steps_sampled': 73200, 'update_time_ms': 2.522, 'num_steps_trained': 73200, 'load_time_ms': 0.678, 'default': {'kl': 0.0184915941208601, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.35077667236328, 'total_loss': 2228.360595703125, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.15222874283790588, 'vf_explained_var': 0.32440924644470215, 'vf_loss': 2228.493896484375}, 'grad_time_ms': 692.227}",3934253,7587.17391872406,-201.8855045823159,cda-server-6,24,-242.41267235711027,{},1464,10.157.146.6,{},-162.37320864558674,0,1200,2025-08-29_16-42-58,61,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756478578,50.0,73200,7587.17391872406,118.61300301551819,61
+74400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 129470.924, 'num_steps_sampled': 74400, 'update_time_ms': 2.492, 'num_steps_trained': 74400, 'load_time_ms': 0.668, 'default': {'kl': 0.01812606118619442, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.345386505126953, 'total_loss': 1536.971435546875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14230865240097046, 'vf_explained_var': 0.4932720363140106, 'vf_loss': 1537.095458984375}, 'grad_time_ms': 690.446}",3934253,7724.547788619995,-203.61260778759706,cda-server-6,24,-242.41267235711027,{},1488,10.157.146.6,{},-167.70166226128026,0,1200,2025-08-29_16-45-15,62,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756478715,50.0,74400,7724.547788619995,137.37386989593506,62
+75600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 130895.803, 'num_steps_sampled': 75600, 'update_time_ms': 2.494, 'num_steps_trained': 75600, 'load_time_ms': 0.666, 'default': {'kl': 0.018034812062978745, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.338903427124023, 'total_loss': 1522.239990234375, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.15658150613307953, 'vf_explained_var': 0.5048775672912598, 'vf_loss': 1522.37841796875}, 'grad_time_ms': 689.206}",3934253,7853.81393122673,-204.79683966833977,cda-server-6,24,-243.82422338554372,{},1512,10.157.146.6,{},-178.07009410244865,0,1200,2025-08-29_16-47-25,63,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756478845,50.0,75600,7853.81393122673,129.26614260673523,63
+76800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 128562.827, 'num_steps_sampled': 76800, 'update_time_ms': 2.455, 'num_steps_trained': 76800, 'load_time_ms': 0.668, 'default': {'kl': 0.01748146489262581, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.300491333007812, 'total_loss': 1020.3151245117188, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1468556821346283, 'vf_explained_var': 0.628902018070221, 'vf_loss': 1020.4442138671875}, 'grad_time_ms': 685.694}",3934253,7962.6811876297,-204.60964781539147,cda-server-6,24,-243.82422338554372,{},1536,10.157.146.6,{},-176.72126537076102,0,1200,2025-08-29_16-49-14,64,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756478954,50.0,76800,7962.6811876297,108.86725640296936,64
+78000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 130065.401, 'num_steps_sampled': 78000, 'update_time_ms': 2.495, 'num_steps_trained': 78000, 'load_time_ms': 0.671, 'default': {'kl': 0.01788967289030552, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.299461364746094, 'total_loss': 1186.3619384765625, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14163149893283844, 'vf_explained_var': 0.5919825434684753, 'vf_loss': 1186.4854736328125}, 'grad_time_ms': 706.33}",3934253,8116.570593595505,-206.1822075156246,cda-server-6,24,-243.82422338554372,{},1560,10.157.146.6,{},-176.72126537076102,0,1200,2025-08-29_16-51-47,65,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756479107,50.0,78000,8116.570593595505,153.88940596580505,65
+79200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 133131.583, 'num_steps_sampled': 79200, 'update_time_ms': 2.57, 'num_steps_trained': 79200, 'load_time_ms': 0.685, 'default': {'kl': 0.017699653282761574, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.267019271850586, 'total_loss': 845.4718627929688, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1545608788728714, 'vf_explained_var': 0.7317812442779541, 'vf_loss': 845.6085815429688}, 'grad_time_ms': 682.49}",3934253,8244.00701546669,-204.67036275163156,cda-server-6,24,-243.82422338554372,{},1584,10.157.146.6,{},-176.72126537076102,0,1200,2025-08-29_16-53-55,66,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756479235,50.0,79200,8244.00701546669,127.4364218711853,66
+80400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 134628.525, 'num_steps_sampled': 80400, 'update_time_ms': 2.499, 'num_steps_trained': 80400, 'load_time_ms': 0.681, 'default': {'kl': 0.018020590767264366, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.243600845336914, 'total_loss': 908.90869140625, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.15663856267929077, 'vf_explained_var': 0.6649714708328247, 'vf_loss': 909.047119140625}, 'grad_time_ms': 684.762}",3934253,8386.31137752533,-203.45399373806507,cda-server-6,24,-236.6240604926094,{},1608,10.157.146.6,{},-176.72126537076102,0,1200,2025-08-29_16-56-17,67,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756479377,50.0,80400,8386.31137752533,142.30436205863953,67
+81600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 127800.101, 'num_steps_sampled': 81600, 'update_time_ms': 2.453, 'num_steps_trained': 81600, 'load_time_ms': 0.659, 'default': {'kl': 0.01779426634311676, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.173952102661133, 'total_loss': 923.4046020507812, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14696913957595825, 'vf_explained_var': 0.6245005130767822, 'vf_loss': 923.5335083007812}, 'grad_time_ms': 714.332}",3934253,8478.86295580864,-201.6099014965169,cda-server-6,24,-236.6240604926094,{},1632,10.157.146.6,{},-170.85541536790782,0,1200,2025-08-29_16-57-50,68,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756479470,50.0,81600,8478.86295580864,92.55157828330994,68
+82800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 127706.637, 'num_steps_sampled': 82800, 'update_time_ms': 2.508, 'num_steps_trained': 82800, 'load_time_ms': 0.664, 'default': {'kl': 0.017506642267107964, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.228662490844727, 'total_loss': 955.548828125, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13391107320785522, 'vf_explained_var': 0.6843433976173401, 'vf_loss': 955.6649169921875}, 'grad_time_ms': 717.92}",3934253,8613.466737508774,-199.76279681389474,cda-server-6,24,-237.87893357886605,{},1656,10.157.146.6,{},-169.67883789220647,0,1200,2025-08-29_17-00-04,69,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756479604,50.0,82800,8613.466737508774,134.60378170013428,69
+84000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 128395.735, 'num_steps_sampled': 84000, 'update_time_ms': 2.614, 'num_steps_trained': 84000, 'load_time_ms': 0.666, 'default': {'kl': 0.01776537112891674, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.27729034423828, 'total_loss': 858.9427490234375, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14407379925251007, 'vf_explained_var': 0.6505129337310791, 'vf_loss': 859.0687255859375}, 'grad_time_ms': 718.019}",3934253,8759.78401517868,-200.28976271340775,cda-server-6,24,-237.87893357886605,{},1680,10.157.146.6,{},-166.50156901737446,0,1200,2025-08-29_17-02-31,70,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756479751,50.0,84000,8759.78401517868,146.31727766990662,70
+85200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 127439.452, 'num_steps_sampled': 85200, 'update_time_ms': 2.607, 'num_steps_trained': 85200, 'load_time_ms': 0.634, 'default': {'kl': 0.017438506707549095, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.173513412475586, 'total_loss': 497.2986145019531, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14505235850811005, 'vf_explained_var': 0.8089240193367004, 'vf_loss': 497.4259948730469}, 'grad_time_ms': 715.196}",3934253,8868.804517507553,-196.78183297555998,cda-server-6,24,-237.87893357886605,{},1704,10.157.146.6,{},-162.7454707928377,0,1200,2025-08-29_17-04-20,71,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756479860,50.0,85200,8868.804517507553,109.02050232887268,71
+86400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 126949.793, 'num_steps_sampled': 86400, 'update_time_ms': 2.563, 'num_steps_trained': 86400, 'load_time_ms': 0.632, 'default': {'kl': 0.017478276044130325, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.1412296295166, 'total_loss': 537.347412109375, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1449865847826004, 'vf_explained_var': 0.8066643476486206, 'vf_loss': 537.4746704101562}, 'grad_time_ms': 716.498}",3934253,9001.29467010498,-196.7515997280192,cda-server-6,24,-237.87893357886605,{},1728,10.157.146.6,{},-162.7454707928377,0,1200,2025-08-29_17-06-32,72,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756479992,50.0,86400,9001.29467010498,132.49015259742737,72
+87600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 126478.744, 'num_steps_sampled': 87600, 'update_time_ms': 2.579, 'num_steps_trained': 87600, 'load_time_ms': 0.628, 'default': {'kl': 0.016992026939988136, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.1573486328125, 'total_loss': 668.7613525390625, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.15834328532218933, 'vf_explained_var': 0.7176796793937683, 'vf_loss': 668.9024658203125}, 'grad_time_ms': 720.878}",3934253,9125.894088745117,-196.29779407045845,cda-server-6,24,-235.77930229587113,{},1752,10.157.146.6,{},-160.0539174982735,0,1200,2025-08-29_17-08-37,73,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756480117,50.0,87600,9125.894088745117,124.59941864013672,73
+88800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 127925.017, 'num_steps_sampled': 88800, 'update_time_ms': 2.58, 'num_steps_trained': 88800, 'load_time_ms': 0.638, 'default': {'kl': 0.01787766069173813, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.137168884277344, 'total_loss': 539.1329956054688, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13451358675956726, 'vf_explained_var': 0.7756462097167969, 'vf_loss': 539.2493286132812}, 'grad_time_ms': 745.174}",3934253,9249.467748641968,-196.06748029454903,cda-server-6,24,-241.6755977787709,{},1776,10.157.146.6,{},-160.0539174982735,0,1200,2025-08-29_17-10-41,74,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756480241,50.0,88800,9249.467748641968,123.57365989685059,74
+90000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 124662.659, 'num_steps_sampled': 90000, 'update_time_ms': 2.552, 'num_steps_trained': 90000, 'load_time_ms': 0.646, 'default': {'kl': 0.016736924648284912, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.0623836517334, 'total_loss': 505.48822021484375, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.141299769282341, 'vf_explained_var': 0.784087598323822, 'vf_loss': 505.6125793457031}, 'grad_time_ms': 746.333}",3934253,9370.744490146637,-195.75643804258007,cda-server-6,24,-241.6755977787709,{},1800,10.157.146.6,{},-160.0539174982735,0,1200,2025-08-29_17-12-42,75,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756480362,50.0,90000,9370.744490146637,121.27674150466919,75
+91200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 121177.374, 'num_steps_sampled': 91200, 'update_time_ms': 2.489, 'num_steps_trained': 91200, 'load_time_ms': 0.624, 'default': {'kl': 0.018218128010630608, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.026023864746094, 'total_loss': 539.190673828125, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14476412534713745, 'vf_explained_var': 0.7949026823043823, 'vf_loss': 539.31689453125}, 'grad_time_ms': 763.486}",3934253,9463.499910831451,-197.00243101656838,cda-server-6,24,-241.6755977787709,{},1824,10.157.146.6,{},-160.0539174982735,0,1200,2025-08-29_17-14-15,76,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756480455,50.0,91200,9463.499910831451,92.75542068481445,76
+92400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 121798.333, 'num_steps_sampled': 92400, 'update_time_ms': 2.498, 'num_steps_trained': 92400, 'load_time_ms': 0.62, 'default': {'kl': 0.017787037417292595, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.07424545288086, 'total_loss': 433.3902587890625, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1485578417778015, 'vf_explained_var': 0.8211551308631897, 'vf_loss': 433.52081298828125}, 'grad_time_ms': 761.874}",3934253,9611.997594594955,-196.73452598520976,cda-server-6,24,-241.6755977787709,{},1848,10.157.146.6,{},-164.4817344017371,0,1200,2025-08-29_17-16-43,77,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756480603,50.0,92400,9611.997594594955,148.49768376350403,77
+93600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 128079.512, 'num_steps_sampled': 93600, 'update_time_ms': 2.497, 'num_steps_trained': 93600, 'load_time_ms': 0.645, 'default': {'kl': 0.01852409727871418, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.058555603027344, 'total_loss': 397.9156799316406, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.16270986199378967, 'vf_explained_var': 0.8211359977722168, 'vf_loss': 398.0596618652344}, 'grad_time_ms': 752.817}",3934253,9767.270104885101,-195.6968907137477,cda-server-6,24,-235.19605511971818,{},1872,10.157.146.6,{},-164.4817344017371,0,1200,2025-08-29_17-19-18,78,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756480758,50.0,93600,9767.270104885101,155.27251029014587,78
+94800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 124008.762, 'num_steps_sampled': 94800, 'update_time_ms': 2.44, 'num_steps_trained': 94800, 'load_time_ms': 0.644, 'default': {'kl': 0.01609645038843155, 'cur_lr': 4.999999873689376e-05, 'entropy': 17.04368019104004, 'total_loss': 314.5567626953125, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.15425236523151398, 'vf_explained_var': 0.8700137138366699, 'vf_loss': 314.6947021484375}, 'grad_time_ms': 759.731}",3934253,9861.234502792358,-196.28833283553197,cda-server-6,24,-235.19605511971818,{},1896,10.157.146.6,{},-164.4817344017371,0,1200,2025-08-29_17-20-52,79,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756480852,50.0,94800,9861.234502792358,93.96439790725708,79
+96000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 119963.043, 'num_steps_sampled': 96000, 'update_time_ms': 2.322, 'num_steps_trained': 96000, 'load_time_ms': 0.648, 'default': {'kl': 0.017412256449460983, 'cur_lr': 4.999999873689376e-05, 'entropy': 16.991172790527344, 'total_loss': 300.1842956542969, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.15490752458572388, 'vf_explained_var': 0.8661372661590576, 'vf_loss': 300.32159423828125}, 'grad_time_ms': 746.461}",3934253,9966.960909605026,-195.29564945059207,cda-server-6,24,-235.19605511971818,{},1920,10.157.146.6,{},-159.59112747436288,0,1200,2025-08-29_17-22-38,80,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756480958,50.0,96000,9966.960909605026,105.72640681266785,80
+97200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 123085.136, 'num_steps_sampled': 97200, 'update_time_ms': 2.358, 'num_steps_trained': 97200, 'load_time_ms': 0.65, 'default': {'kl': 0.018388399854302406, 'cur_lr': 4.999999873689376e-05, 'entropy': 16.957988739013672, 'total_loss': 435.7005310058594, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.15358111262321472, 'vf_explained_var': 0.8013516068458557, 'vf_loss': 435.8354797363281}, 'grad_time_ms': 738.744}",3934253,10107.12469124794,-195.21138806142923,cda-server-6,24,-238.3792524057925,{},1944,10.157.146.6,{},-159.59112747436288,0,1200,2025-08-29_17-24-58,81,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756481098,50.0,97200,10107.12469124794,140.16378164291382,81
+98400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 122775.768, 'num_steps_sampled': 98400, 'update_time_ms': 2.377, 'num_steps_trained': 98400, 'load_time_ms': 0.669, 'default': {'kl': 0.01676376722753048, 'cur_lr': 4.999999873689376e-05, 'entropy': 16.881912231445312, 'total_loss': 455.4905700683594, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.15382982790470123, 'vf_explained_var': 0.7882832884788513, 'vf_loss': 455.62738037109375}, 'grad_time_ms': 733.703}",3934253,10236.471656560898,-193.51017683169036,cda-server-6,24,-238.3792524057925,{},1968,10.157.146.6,{},-159.59112747436288,0,1200,2025-08-29_17-27-08,82,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756481228,50.0,98400,10236.471656560898,129.34696531295776,82
+99600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 122992.217, 'num_steps_sampled': 99600, 'update_time_ms': 2.418, 'num_steps_trained': 99600, 'load_time_ms': 0.667, 'default': {'kl': 0.016289807856082916, 'cur_lr': 4.999999873689376e-05, 'entropy': 16.883893966674805, 'total_loss': 278.7664794921875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14257968962192535, 'vf_explained_var': 0.8755154013633728, 'vf_loss': 278.8926086425781}, 'grad_time_ms': 718.016}",3934253,10363.079635858536,-193.8101555905133,cda-server-6,24,-238.3792524057925,{},1992,10.157.146.6,{},-159.59112747436288,0,1200,2025-08-29_17-29-14,83,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756481354,50.0,99600,10363.079635858536,126.60797929763794,83
+100800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 121729.185, 'num_steps_sampled': 100800, 'update_time_ms': 2.458, 'num_steps_trained': 100800, 'load_time_ms': 0.657, 'default': {'kl': 0.01808132603764534, 'cur_lr': 4.999999873689376e-05, 'entropy': 16.88953399658203, 'total_loss': 364.16656494140625, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1611376404762268, 'vf_explained_var': 0.8535504341125488, 'vf_loss': 364.30938720703125}, 'grad_time_ms': 689.813}",3934253,10473.741010189056,-194.8083754000186,cda-server-6,24,-244.0834730499058,{},2016,10.157.146.6,{},-158.90891938732824,0,1200,2025-08-29_17-31-05,84,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756481465,50.0,100800,10473.741010189056,110.66137433052063,84
+102000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 121568.223, 'num_steps_sampled': 102000, 'update_time_ms': 2.407, 'num_steps_trained': 102000, 'load_time_ms': 0.648, 'default': {'kl': 0.016419248655438423, 'cur_lr': 4.999999873689376e-05, 'entropy': 16.816686630249023, 'total_loss': 237.03546142578125, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14151573181152344, 'vf_explained_var': 0.8933451175689697, 'vf_loss': 237.16033935546875}, 'grad_time_ms': 673.73}",3934253,10593.246505260468,-192.0402432573778,cda-server-6,24,-244.0834730499058,{},2040,10.157.146.6,{},-158.90891938732824,0,1200,2025-08-29_17-33-04,85,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756481584,50.0,102000,10593.246505260468,119.50549507141113,85
+103200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 126660.424, 'num_steps_sampled': 103200, 'update_time_ms': 2.477, 'num_steps_trained': 103200, 'load_time_ms': 0.651, 'default': {'kl': 0.017432495951652527, 'cur_lr': 4.999999873689376e-05, 'entropy': 16.767534255981445, 'total_loss': 315.4970397949219, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14589375257492065, 'vf_explained_var': 0.8593595027923584, 'vf_loss': 315.6252746582031}, 'grad_time_ms': 668.927}",3934253,10736.875820159912,-192.24686534121082,cda-server-6,24,-244.0834730499058,{},2064,10.157.146.6,{},-156.46359577259705,0,1200,2025-08-29_17-35-28,86,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756481728,50.0,103200,10736.875820159912,143.62931489944458,86
+104400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 125743.944, 'num_steps_sampled': 104400, 'update_time_ms': 2.492, 'num_steps_trained': 104400, 'load_time_ms': 0.656, 'default': {'kl': 0.017669349908828735, 'cur_lr': 4.999999873689376e-05, 'entropy': 16.812036514282227, 'total_loss': 267.5138854980469, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.15393000841140747, 'vf_explained_var': 0.8753257989883423, 'vf_loss': 267.64990234375}, 'grad_time_ms': 668.674}",3934253,10876.206056833267,-191.51076350055698,cda-server-6,24,-244.0834730499058,{},2088,10.157.146.6,{},-156.46359577259705,0,1200,2025-08-29_17-37-47,87,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756481867,50.0,104400,10876.206056833267,139.3302366733551,87
+105600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 120972.7, 'num_steps_sampled': 105600, 'update_time_ms': 2.506, 'num_steps_trained': 105600, 'load_time_ms': 0.623, 'default': {'kl': 0.01689998432993889, 'cur_lr': 4.999999873689376e-05, 'entropy': 16.69136619567871, 'total_loss': 439.05120849609375, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1567053645849228, 'vf_explained_var': 0.805030107498169, 'vf_loss': 439.1907958984375}, 'grad_time_ms': 681.062}",3934253,10983.889906644821,-189.84582066774183,cda-server-6,24,-240.50411683754677,{},2112,10.157.146.6,{},-152.51348529183588,0,1200,2025-08-29_17-39-35,88,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756481975,50.0,105600,10983.889906644821,107.68384981155396,88
+106800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 123934.882, 'num_steps_sampled': 106800, 'update_time_ms': 2.49, 'num_steps_trained': 106800, 'load_time_ms': 0.626, 'default': {'kl': 0.01723390817642212, 'cur_lr': 4.999999873689376e-05, 'entropy': 16.839763641357422, 'total_loss': 313.3089294433594, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14374259114265442, 'vf_explained_var': 0.8504605889320374, 'vf_loss': 313.4351806640625}, 'grad_time_ms': 669.345}",3934253,11107.359429359436,-190.61900295321735,cda-server-6,24,-240.50411683754677,{},2136,10.157.146.6,{},-152.51348529183588,0,1200,2025-08-29_17-41-39,89,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756482099,50.0,106800,11107.359429359436,123.46952271461487,89
+108000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 126152.382, 'num_steps_sampled': 108000, 'update_time_ms': 2.516, 'num_steps_trained': 108000, 'load_time_ms': 0.623, 'default': {'kl': 0.01680140011012554, 'cur_lr': 4.999999873689376e-05, 'entropy': 16.745079040527344, 'total_loss': 342.0080871582031, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.15599854290485382, 'vf_explained_var': 0.8507482409477234, 'vf_loss': 342.14703369140625}, 'grad_time_ms': 659.328}",3934253,11235.161835432053,-190.20998737125626,cda-server-6,24,-240.50411683754677,{},2160,10.157.146.6,{},-152.51348529183588,0,1200,2025-08-29_17-43-46,90,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756482226,50.0,108000,11235.161835432053,127.80240607261658,90
+109200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 124292.415, 'num_steps_sampled': 109200, 'update_time_ms': 2.439, 'num_steps_trained': 109200, 'load_time_ms': 0.617, 'default': {'kl': 0.015435642562806606, 'cur_lr': 4.999999873689376e-05, 'entropy': 16.718061447143555, 'total_loss': 403.6151123046875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.129756897687912, 'vf_explained_var': 0.8099173903465271, 'vf_loss': 403.729248046875}, 'grad_time_ms': 640.826}",3934253,11356.541090488434,-189.15599179625715,cda-server-6,24,-240.14707734147564,{},2184,10.157.146.6,{},-152.51348529183588,0,1200,2025-08-29_17-45-48,91,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756482348,50.0,109200,11356.541090488434,121.37925505638123,91
+110400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 121373.638, 'num_steps_sampled': 110400, 'update_time_ms': 2.509, 'num_steps_trained': 110400, 'load_time_ms': 0.598, 'default': {'kl': 0.015910038724541664, 'cur_lr': 4.999999873689376e-05, 'entropy': 16.68692970275879, 'total_loss': 359.96844482421875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.15589465200901031, 'vf_explained_var': 0.8525227904319763, 'vf_loss': 360.1082458496094}, 'grad_time_ms': 650.758}",3934253,11456.798621892929,-185.9974027787964,cda-server-6,24,-240.14707734147564,{},2208,10.157.146.6,{},-152.51348529183588,0,1200,2025-08-29_17-47-28,92,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756482448,50.0,110400,11456.798621892929,100.25753140449524,92
+111600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 120579.803, 'num_steps_sampled': 111600, 'update_time_ms': 2.452, 'num_steps_trained': 111600, 'load_time_ms': 0.598, 'default': {'kl': 0.016870131716132164, 'cur_lr': 4.999999873689376e-05, 'entropy': 16.627105712890625, 'total_loss': 202.50332641601562, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13515739142894745, 'vf_explained_var': 0.9027056097984314, 'vf_loss': 202.62139892578125}, 'grad_time_ms': 664.662}",3934253,11575.607246160507,-184.28075541258278,cda-server-6,24,-240.14707734147564,{},2232,10.157.146.6,{},-156.2375228182839,0,1200,2025-08-29_17-49-27,93,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756482567,50.0,111600,11575.607246160507,118.80862426757812,93
+112800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 118513.786, 'num_steps_sampled': 112800, 'update_time_ms': 2.458, 'num_steps_trained': 112800, 'load_time_ms': 0.604, 'default': {'kl': 0.01635323092341423, 'cur_lr': 4.999999873689376e-05, 'entropy': 16.571773529052734, 'total_loss': 202.59608459472656, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13777993619441986, 'vf_explained_var': 0.8907999396324158, 'vf_loss': 202.71730041503906}, 'grad_time_ms': 689.211}",3934253,11665.854831933975,-183.06815936431977,cda-server-6,24,-230.12884374648553,{},2256,10.157.146.6,{},-155.35197419791174,0,1200,2025-08-29_17-50-57,94,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756482657,50.0,112800,11665.854831933975,90.24758577346802,94
+114000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 118112.503, 'num_steps_sampled': 114000, 'update_time_ms': 2.541, 'num_steps_trained': 114000, 'load_time_ms': 0.638, 'default': {'kl': 0.01679901033639908, 'cur_lr': 4.999999873689376e-05, 'entropy': 16.693180084228516, 'total_loss': 392.57073974609375, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.15203702449798584, 'vf_explained_var': 0.8344202637672424, 'vf_loss': 392.70574951171875}, 'grad_time_ms': 689.018}",3934253,11781.346488714218,-184.8951815855976,cda-server-6,24,-236.2370975894316,{},2280,10.157.146.6,{},-155.35197419791174,0,1200,2025-08-29_17-52-53,95,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756482773,50.0,114000,11781.346488714218,115.49165678024292,95
+115200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 113381.279, 'num_steps_sampled': 115200, 'update_time_ms': 2.48, 'num_steps_trained': 115200, 'load_time_ms': 0.636, 'default': {'kl': 0.017182350158691406, 'cur_lr': 4.999999873689376e-05, 'entropy': 16.59419822692871, 'total_loss': 327.04345703125, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14678317308425903, 'vf_explained_var': 0.8305256366729736, 'vf_loss': 327.1728515625}, 'grad_time_ms': 690.153}",3934253,11877.674539804459,-184.5234958852344,cda-server-6,24,-236.2370975894316,{},2304,10.157.146.6,{},-153.70410475921176,0,1200,2025-08-29_17-54-29,96,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756482869,50.0,115200,11877.674539804459,96.32805109024048,96
+116400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 113362.315, 'num_steps_sampled': 116400, 'update_time_ms': 2.472, 'num_steps_trained': 116400, 'load_time_ms': 0.636, 'default': {'kl': 0.017371561378240585, 'cur_lr': 4.999999873689376e-05, 'entropy': 16.490705490112305, 'total_loss': 211.58644104003906, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14773549139499664, 'vf_explained_var': 0.8857764005661011, 'vf_loss': 211.7165985107422}, 'grad_time_ms': 678.808}",3934253,12016.701777458191,-182.23667207649603,cda-server-6,24,-236.2370975894316,{},2328,10.157.146.6,{},-153.70410475921176,0,1200,2025-08-29_17-56-48,97,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756483008,50.0,116400,12016.701777458191,139.0272376537323,97
+117600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 113857.263, 'num_steps_sampled': 117600, 'update_time_ms': 2.434, 'num_steps_trained': 117600, 'load_time_ms': 0.639, 'default': {'kl': 0.015952367335557938, 'cur_lr': 4.999999873689376e-05, 'entropy': 16.48573112487793, 'total_loss': 260.2500915527344, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12603165209293365, 'vf_explained_var': 0.8630385994911194, 'vf_loss': 260.3599853515625}, 'grad_time_ms': 671.948}",3934253,12129.268003940582,-181.74283275609204,cda-server-6,24,-236.2370975894316,{},2352,10.157.146.6,{},-153.70410475921176,0,1200,2025-08-29_17-58-41,98,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756483121,50.0,117600,12129.268003940582,112.56622648239136,98
+118800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 111223.736, 'num_steps_sampled': 118800, 'update_time_ms': 2.448, 'num_steps_trained': 118800, 'load_time_ms': 0.636, 'default': {'kl': 0.01815967448055744, 'cur_lr': 4.999999873689376e-05, 'entropy': 16.53923225402832, 'total_loss': 143.78089904785156, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.15580715239048004, 'vf_explained_var': 0.9172838926315308, 'vf_loss': 143.91831970214844}, 'grad_time_ms': 669.875}",3934253,12226.381784915924,-177.3244781328566,cda-server-6,24,-223.9890509880485,{},2376,10.157.146.6,{},-153.70410475921176,0,1200,2025-08-29_18-00-18,99,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756483218,50.0,118800,12226.381784915924,97.1137809753418,99
+120000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 110873.774, 'num_steps_sampled': 120000, 'update_time_ms': 2.437, 'num_steps_trained': 120000, 'load_time_ms': 0.633, 'default': {'kl': 0.016468364745378494, 'cur_lr': 4.999999873689376e-05, 'entropy': 16.499685287475586, 'total_loss': 194.18292236328125, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14151686429977417, 'vf_explained_var': 0.9113339185714722, 'vf_loss': 194.30775451660156}, 'grad_time_ms': 686.587}",3934253,12350.85043144226,-177.8404594305838,cda-server-6,24,-223.9890509880485,{},2400,10.157.146.6,{},-153.55256333374888,0,1200,2025-08-29_18-02-22,100,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756483342,50.0,120000,12350.85043144226,124.46864652633667,100
+121200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 111372.389, 'num_steps_sampled': 121200, 'update_time_ms': 2.463, 'num_steps_trained': 121200, 'load_time_ms': 0.638, 'default': {'kl': 0.015280604362487793, 'cur_lr': 4.999999873689376e-05, 'entropy': 16.443017959594727, 'total_loss': 351.51165771484375, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14789825677871704, 'vf_explained_var': 0.8464590311050415, 'vf_loss': 351.64410400390625}, 'grad_time_ms': 706.421}",3934253,12477.413677215576,-177.50363631361705,cda-server-6,24,-237.2865543757983,{},2424,10.157.146.6,{},-153.55256333374888,0,1200,2025-08-29_18-04-29,101,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756483469,50.0,121200,12477.413677215576,126.56324577331543,101
+122400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 116905.836, 'num_steps_sampled': 122400, 'update_time_ms': 2.469, 'num_steps_trained': 122400, 'load_time_ms': 0.641, 'default': {'kl': 0.017815299332141876, 'cur_lr': 4.999999873689376e-05, 'entropy': 16.31475067138672, 'total_loss': 196.4730682373047, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1293335109949112, 'vf_explained_var': 0.8965740203857422, 'vf_loss': 196.5843505859375}, 'grad_time_ms': 698.142}",3934253,12632.923156023026,-176.86709660749798,cda-server-6,24,-237.2865543757983,{},2448,10.157.146.6,{},-152.76490594743353,0,1200,2025-08-29_18-07-04,102,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756483624,50.0,122400,12632.923156023026,155.50947880744934,102
+123600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 113790.619, 'num_steps_sampled': 123600, 'update_time_ms': 2.447, 'num_steps_trained': 123600, 'load_time_ms': 0.645, 'default': {'kl': 0.01563744992017746, 'cur_lr': 4.999999873689376e-05, 'entropy': 16.338083267211914, 'total_loss': 151.3992919921875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14109545946121216, 'vf_explained_var': 0.9077298045158386, 'vf_loss': 151.52456665039062}, 'grad_time_ms': 691.221}",3934253,12720.509969711304,-175.99193290191877,cda-server-6,24,-237.2865543757983,{},2472,10.157.146.6,{},-147.00338003430244,0,1200,2025-08-29_18-08-32,103,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756483712,50.0,123600,12720.509969711304,87.5868136882782,103
+124800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 114789.433, 'num_steps_sampled': 124800, 'update_time_ms': 2.446, 'num_steps_trained': 124800, 'load_time_ms': 0.643, 'default': {'kl': 0.016803696751594543, 'cur_lr': 4.999999873689376e-05, 'entropy': 16.251419067382812, 'total_loss': 151.03599548339844, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13679622113704681, 'vf_explained_var': 0.8967797756195068, 'vf_loss': 151.15579223632812}, 'grad_time_ms': 680.611}",3934253,12820.63918542862,-172.28727233323306,cda-server-6,24,-237.2865543757983,{},2496,10.157.146.6,{},-147.00338003430244,0,1200,2025-08-29_18-10-12,104,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756483812,50.0,124800,12820.63918542862,100.12921571731567,104
+126000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 116485.609, 'num_steps_sampled': 126000, 'update_time_ms': 2.39, 'num_steps_trained': 126000, 'load_time_ms': 0.609, 'default': {'kl': 0.016549859195947647, 'cur_lr': 4.999999873689376e-05, 'entropy': 16.36214256286621, 'total_loss': 301.8638916015625, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1474459022283554, 'vf_explained_var': 0.866607666015625, 'vf_loss': 301.99456787109375}, 'grad_time_ms': 696.338}",3934253,12953.248711824417,-173.19153721824375,cda-server-6,24,-228.24084146483688,{},2520,10.157.146.6,{},-147.00338003430244,0,1200,2025-08-29_18-12-25,105,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756483945,50.0,126000,12953.248711824417,132.60952639579773,105
+127200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 116791.533, 'num_steps_sampled': 127200, 'update_time_ms': 2.492, 'num_steps_trained': 127200, 'load_time_ms': 0.608, 'default': {'kl': 0.01571556180715561, 'cur_lr': 4.999999873689376e-05, 'entropy': 16.307790756225586, 'total_loss': 150.79681396484375, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1507481187582016, 'vf_explained_var': 0.9222152233123779, 'vf_loss': 150.93165588378906}, 'grad_time_ms': 712.378}",3934253,13052.797505378723,-172.00315892886397,cda-server-6,24,-228.24084146483688,{},2544,10.157.146.6,{},-147.00338003430244,0,1200,2025-08-29_18-14-04,106,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756484044,50.0,127200,13052.797505378723,99.54879355430603,106
+128400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 114973.748, 'num_steps_sampled': 128400, 'update_time_ms': 2.506, 'num_steps_trained': 128400, 'load_time_ms': 0.604, 'default': {'kl': 0.016194190829992294, 'cur_lr': 4.999999873689376e-05, 'entropy': 16.16952896118164, 'total_loss': 188.38453674316406, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12826089560985565, 'vf_explained_var': 0.8953073024749756, 'vf_loss': 188.4963836669922}, 'grad_time_ms': 717.54}",3934253,13173.698773622513,-171.93660035227012,cda-server-6,24,-228.24084146483688,{},2568,10.157.146.6,{},-154.05940271714744,0,1200,2025-08-29_18-16-05,107,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756484165,50.0,128400,13173.698773622513,120.90126824378967,107
+129600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 113312.925, 'num_steps_sampled': 129600, 'update_time_ms': 2.55, 'num_steps_trained': 129600, 'load_time_ms': 0.605, 'default': {'kl': 0.015851590782403946, 'cur_lr': 4.999999873689376e-05, 'entropy': 16.239519119262695, 'total_loss': 147.88504028320312, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13959604501724243, 'vf_explained_var': 0.9218350648880005, 'vf_loss': 148.0085906982422}, 'grad_time_ms': 717.604}",3934253,13269.655487060547,-172.38800804952464,cda-server-6,24,-228.24084146483688,{},2592,10.157.146.6,{},-154.05940271714744,0,1200,2025-08-29_18-17-41,108,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756484261,50.0,129600,13269.655487060547,95.95671343803406,108
+130800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 117926.066, 'num_steps_sampled': 130800, 'update_time_ms': 2.534, 'num_steps_trained': 130800, 'load_time_ms': 0.605, 'default': {'kl': 0.017764806747436523, 'cur_lr': 4.999999873689376e-05, 'entropy': 16.124168395996094, 'total_loss': 137.75177001953125, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1361183226108551, 'vf_explained_var': 0.9212970733642578, 'vf_loss': 137.86990356445312}, 'grad_time_ms': 713.76}",3934253,13412.86143398285,-170.3833210749433,cda-server-6,24,-228.24084146483688,{},2616,10.157.146.6,{},-152.87937694663307,0,1200,2025-08-29_18-20-04,109,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756484404,50.0,130800,13412.86143398285,143.20594692230225,109
+132000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 115004.932, 'num_steps_sampled': 132000, 'update_time_ms': 2.5, 'num_steps_trained': 132000, 'load_time_ms': 0.606, 'default': {'kl': 0.01462532114237547, 'cur_lr': 4.999999873689376e-05, 'entropy': 16.235450744628906, 'total_loss': 312.9838562011719, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12912686169147491, 'vf_explained_var': 0.8360607624053955, 'vf_loss': 313.09820556640625}, 'grad_time_ms': 720.145}",3934253,13508.183268070221,-170.65942585523808,cda-server-6,24,-235.71808497253244,{},2640,10.157.146.6,{},-152.87937694663307,0,1200,2025-08-29_18-21-40,110,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756484500,50.0,132000,13508.183268070221,95.32183408737183,110
+133200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 110919.135, 'num_steps_sampled': 133200, 'update_time_ms': 2.516, 'num_steps_trained': 133200, 'load_time_ms': 0.602, 'default': {'kl': 0.015565955080091953, 'cur_lr': 4.999999873689376e-05, 'entropy': 16.11193084716797, 'total_loss': 167.30422973632812, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12671928107738495, 'vf_explained_var': 0.8889510035514832, 'vf_loss': 167.4152069091797}, 'grad_time_ms': 730.481}",3934253,13593.992814540863,-169.53994936434026,cda-server-6,24,-235.71808497253244,{},2664,10.157.146.6,{},-151.91960658986196,0,1200,2025-08-29_18-23-06,111,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756484586,50.0,133200,13593.992814540863,85.80954647064209,111
+134400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 106473.289, 'num_steps_sampled': 134400, 'update_time_ms': 2.471, 'num_steps_trained': 134400, 'load_time_ms': 0.613, 'default': {'kl': 0.016392739489674568, 'cur_lr': 4.999999873689376e-05, 'entropy': 16.099382400512695, 'total_loss': 139.86541748046875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13682633638381958, 'vf_explained_var': 0.9135033488273621, 'vf_loss': 139.98565673828125}, 'grad_time_ms': 731.735}",3934253,13705.056573867798,-169.04459473864682,cda-server-6,24,-235.71808497253244,{},2688,10.157.146.6,{},-151.91960658986196,0,1200,2025-08-29_18-24-57,112,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756484697,50.0,134400,13705.056573867798,111.06375932693481,112
+135600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 110040.376, 'num_steps_sampled': 135600, 'update_time_ms': 2.501, 'num_steps_trained': 135600, 'load_time_ms': 0.617, 'default': {'kl': 0.013566691428422928, 'cur_lr': 4.999999873689376e-05, 'entropy': 16.00737762451172, 'total_loss': 143.7192840576172, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10995927453041077, 'vf_explained_var': 0.9082484841346741, 'vf_loss': 143.8155059814453}, 'grad_time_ms': 731.457}",3934253,13828.311593294144,-167.68415176884224,cda-server-6,24,-235.71808497253244,{},2712,10.157.146.6,{},-151.91960658986196,0,1200,2025-08-29_18-27-00,113,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756484820,50.0,135600,13828.311593294144,123.25501942634583,113
+136800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 111408.427, 'num_steps_sampled': 136800, 'update_time_ms': 2.489, 'num_steps_trained': 136800, 'load_time_ms': 0.623, 'default': {'kl': 0.01726832240819931, 'cur_lr': 4.999999873689376e-05, 'entropy': 16.0635929107666, 'total_loss': 108.0864486694336, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.15256041288375854, 'vf_explained_var': 0.9264135360717773, 'vf_loss': 108.22151947021484}, 'grad_time_ms': 730.998}",3934253,13942.116770505905,-164.90202950385196,cda-server-6,24,-210.5466717526865,{},2736,10.157.146.6,{},-151.91960658986196,0,1200,2025-08-29_18-28-54,114,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756484934,50.0,136800,13942.116770505905,113.80517721176147,114
+138000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 108091.929, 'num_steps_sampled': 138000, 'update_time_ms': 2.5, 'num_steps_trained': 138000, 'load_time_ms': 0.642, 'default': {'kl': 0.015876974910497665, 'cur_lr': 4.999999873689376e-05, 'entropy': 16.05762481689453, 'total_loss': 195.14218139648438, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13380476832389832, 'vf_explained_var': 0.8996444344520569, 'vf_loss': 195.25990295410156}, 'grad_time_ms': 738.229}",3934253,14041.634573221207,-165.66463873140276,cda-server-6,24,-210.5466717526865,{},2760,10.157.146.6,{},-151.9062574171948,0,1200,2025-08-29_18-30-33,115,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756485033,50.0,138000,14041.634573221207,99.51780271530151,115
+139200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 108473.617, 'num_steps_sampled': 139200, 'update_time_ms': 2.427, 'num_steps_trained': 139200, 'load_time_ms': 0.652, 'default': {'kl': 0.014280934818089008, 'cur_lr': 4.999999873689376e-05, 'entropy': 15.890507698059082, 'total_loss': 205.2573699951172, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12453499436378479, 'vf_explained_var': 0.874573826789856, 'vf_loss': 205.367431640625}, 'grad_time_ms': 727.677}",3934253,14144.893615484238,-164.95825059903262,cda-server-6,24,-223.89982514164038,{},2784,10.157.146.6,{},-145.5091252897312,0,1200,2025-08-29_18-32-17,116,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756485137,50.0,139200,14144.893615484238,103.259042263031,116
+140400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 106538.732, 'num_steps_sampled': 140400, 'update_time_ms': 2.408, 'num_steps_trained': 140400, 'load_time_ms': 0.652, 'default': {'kl': 0.015535826794803143, 'cur_lr': 4.999999873689376e-05, 'entropy': 15.938193321228027, 'total_loss': 172.31068420410156, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1428217738866806, 'vf_explained_var': 0.8901649117469788, 'vf_loss': 172.43780517578125}, 'grad_time_ms': 733.555}",3934253,14246.504431962967,-165.22754313461462,cda-server-6,24,-223.89982514164038,{},2808,10.157.146.6,{},-142.7293238662343,0,1200,2025-08-29_18-33-58,117,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756485238,50.0,140400,14246.504431962967,101.61081647872925,117
+141600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 109051.889, 'num_steps_sampled': 141600, 'update_time_ms': 2.398, 'num_steps_trained': 141600, 'load_time_ms': 0.679, 'default': {'kl': 0.015633488073945045, 'cur_lr': 4.999999873689376e-05, 'entropy': 15.816776275634766, 'total_loss': 69.72467803955078, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14318110048770905, 'vf_explained_var': 0.9485836029052734, 'vf_loss': 69.85203552246094}, 'grad_time_ms': 735.071}",3934253,14367.609112024307,-164.35507615599417,cda-server-6,24,-223.89982514164038,{},2832,10.157.146.6,{},-142.7293238662343,0,1200,2025-08-29_18-35-59,118,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756485359,50.0,141600,14367.609112024307,121.10468006134033,118
+142800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 104404.834, 'num_steps_sampled': 142800, 'update_time_ms': 2.474, 'num_steps_trained': 142800, 'load_time_ms': 0.681, 'default': {'kl': 0.016464034095406532, 'cur_lr': 4.999999873689376e-05, 'entropy': 15.754087448120117, 'total_loss': 113.65615844726562, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13386313617229462, 'vf_explained_var': 0.9267792105674744, 'vf_loss': 113.77334594726562}, 'grad_time_ms': 744.731}",3934253,14464.442219495773,-163.03480213112596,cda-server-6,24,-223.89982514164038,{},2856,10.157.146.6,{},-142.7293238662343,0,1200,2025-08-29_18-37-36,119,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756485456,50.0,142800,14464.442219495773,96.83310747146606,119
+144000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 106612.481, 'num_steps_sampled': 144000, 'update_time_ms': 2.491, 'num_steps_trained': 144000, 'load_time_ms': 0.717, 'default': {'kl': 0.016114315018057823, 'cur_lr': 4.999999873689376e-05, 'entropy': 15.789478302001953, 'total_loss': 94.16368865966797, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1432938277721405, 'vf_explained_var': 0.9434927701950073, 'vf_loss': 94.2906723022461}, 'grad_time_ms': 744.017}",3934253,14581.834088563919,-162.69901184530545,cda-server-6,24,-227.78725353717078,{},2880,10.157.146.6,{},-142.7293238662343,0,1200,2025-08-29_18-39-34,120,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756485574,50.0,144000,14581.834088563919,117.39186906814575,120
+145200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 108981.101, 'num_steps_sampled': 145200, 'update_time_ms': 2.477, 'num_steps_trained': 145200, 'load_time_ms': 0.725, 'default': {'kl': 0.015237444080412388, 'cur_lr': 4.999999873689376e-05, 'entropy': 15.758185386657715, 'total_loss': 102.03178405761719, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13658057153224945, 'vf_explained_var': 0.9302859902381897, 'vf_loss': 102.15293884277344}, 'grad_time_ms': 741.072}",3934253,14691.300345897675,-162.34126236260016,cda-server-6,24,-227.78725353717078,{},2904,10.157.146.6,{},-142.7293238662343,0,1200,2025-08-29_18-41-23,121,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756485683,50.0,145200,14691.300345897675,109.4662573337555,121
+146400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 107680.629, 'num_steps_sampled': 146400, 'update_time_ms': 2.47, 'num_steps_trained': 146400, 'load_time_ms': 0.712, 'default': {'kl': 0.015177453868091106, 'cur_lr': 4.999999873689376e-05, 'entropy': 15.74573802947998, 'total_loss': 118.48878479003906, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13045638799667358, 'vf_explained_var': 0.9191161394119263, 'vf_loss': 118.60386657714844}, 'grad_time_ms': 752.242}",3934253,14789.470313310623,-162.87203130228417,cda-server-6,24,-227.78725353717078,{},2928,10.157.146.6,{},-152.73308602597515,0,1200,2025-08-29_18-43-01,122,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756485781,50.0,146400,14789.470313310623,98.16996741294861,122
+147600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 106273.709, 'num_steps_sampled': 147600, 'update_time_ms': 2.451, 'num_steps_trained': 147600, 'load_time_ms': 0.704, 'default': {'kl': 0.0166685301810503, 'cur_lr': 4.999999873689376e-05, 'entropy': 15.755717277526855, 'total_loss': 85.09651947021484, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13483008742332458, 'vf_explained_var': 0.94509357213974, 'vf_loss': 85.21446228027344}, 'grad_time_ms': 757.57}",3934253,14898.709458351135,-162.7500207409775,cda-server-6,24,-227.78725353717078,{},2952,10.157.146.6,{},-152.3745728662264,0,1200,2025-08-29_18-44-51,123,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756485891,50.0,147600,14898.709458351135,109.23914504051208,123
+148800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 105721.462, 'num_steps_sampled': 148800, 'update_time_ms': 2.425, 'num_steps_trained': 148800, 'load_time_ms': 0.702, 'default': {'kl': 0.016147322952747345, 'cur_lr': 4.999999873689376e-05, 'entropy': 15.723305702209473, 'total_loss': 119.27034759521484, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13226349651813507, 'vf_explained_var': 0.9263350963592529, 'vf_loss': 119.38626098632812}, 'grad_time_ms': 760.658}",3934253,15007.022426128387,-162.13066795735972,cda-server-6,24,-222.88002538887568,{},2976,10.157.146.6,{},-152.3745728662264,0,1200,2025-08-29_18-46-39,124,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756485999,50.0,148800,15007.022426128387,108.3129677772522,124
+150000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 105027.21, 'num_steps_sampled': 150000, 'update_time_ms': 2.406, 'num_steps_trained': 150000, 'load_time_ms': 0.684, 'default': {'kl': 0.016684727743268013, 'cur_lr': 4.999999873689376e-05, 'entropy': 15.590709686279297, 'total_loss': 77.29227447509766, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.15790709853172302, 'vf_explained_var': 0.9456202387809753, 'vf_loss': 77.43329620361328}, 'grad_time_ms': 739.797}",3934253,15099.387891292572,-161.97012023780732,cda-server-6,24,-196.12841532848358,{},3000,10.157.146.6,{},-152.3745728662264,0,1200,2025-08-29_18-48-11,125,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756486091,50.0,150000,15099.387891292572,92.36546516418457,125
+151200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 104640.818, 'num_steps_sampled': 151200, 'update_time_ms': 2.413, 'num_steps_trained': 151200, 'load_time_ms': 0.675, 'default': {'kl': 0.015469375997781754, 'cur_lr': 4.999999873689376e-05, 'entropy': 15.370908737182617, 'total_loss': 56.76454162597656, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.134979248046875, 'vf_explained_var': 0.9565190076828003, 'vf_loss': 56.88386154174805}, 'grad_time_ms': 736.857}",3934253,15198.75416469574,-161.44737111172932,cda-server-6,24,-196.12841532848358,{},3024,10.157.146.6,{},-152.36681112874857,0,1200,2025-08-29_18-49-51,126,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756486191,50.0,151200,15198.75416469574,99.36627340316772,126
+152400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 105887.097, 'num_steps_sampled': 152400, 'update_time_ms': 2.475, 'num_steps_trained': 152400, 'load_time_ms': 0.681, 'default': {'kl': 0.017822375521063805, 'cur_lr': 4.999999873689376e-05, 'entropy': 15.559758186340332, 'total_loss': 93.37821197509766, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.15308107435703278, 'vf_explained_var': 0.9373614192008972, 'vf_loss': 93.51325225830078}, 'grad_time_ms': 745.519}",3934253,15312.915374994278,-160.98241869124263,cda-server-6,24,-196.12841532848358,{},3048,10.157.146.6,{},-152.04356348579236,0,1200,2025-08-29_18-51-45,127,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756486305,50.0,152400,15312.915374994278,114.16121029853821,127
+153600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 104508.826, 'num_steps_sampled': 153600, 'update_time_ms': 2.481, 'num_steps_trained': 153600, 'load_time_ms': 0.652, 'default': {'kl': 0.01617765799164772, 'cur_lr': 4.999999873689376e-05, 'entropy': 15.50704574584961, 'total_loss': 83.95415496826172, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1417163461446762, 'vf_explained_var': 0.9401677250862122, 'vf_loss': 84.07949829101562}, 'grad_time_ms': 749.997}",3934253,15420.28134059906,-160.89652670146586,cda-server-6,24,-196.12841532848358,{},3072,10.157.146.6,{},-149.96283505629324,0,1200,2025-08-29_18-53-32,128,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756486412,50.0,153600,15420.28134059906,107.3659656047821,128
+154800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 102807.243, 'num_steps_sampled': 154800, 'update_time_ms': 2.451, 'num_steps_trained': 154800, 'load_time_ms': 0.655, 'default': {'kl': 0.012704680673778057, 'cur_lr': 4.999999873689376e-05, 'entropy': 15.458696365356445, 'total_loss': 92.21479034423828, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1063886359333992, 'vf_explained_var': 0.9457715153694153, 'vf_loss': 92.30831909179688}, 'grad_time_ms': 746.283}",3934253,15500.06122136116,-160.04755913315933,cda-server-6,24,-196.3103197721101,{},3096,10.157.146.6,{},-149.96283505629324,0,1200,2025-08-29_18-54-52,129,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756486492,50.0,154800,15500.06122136116,79.77988076210022,129
+156000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 103082.504, 'num_steps_sampled': 156000, 'update_time_ms': 2.458, 'num_steps_trained': 156000, 'load_time_ms': 0.624, 'default': {'kl': 0.015438392758369446, 'cur_lr': 4.999999873689376e-05, 'entropy': 15.547718048095703, 'total_loss': 95.83563232421875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11330587416887283, 'vf_explained_var': 0.9335554838180542, 'vf_loss': 95.93331146240234}, 'grad_time_ms': 745.026}",3934253,15620.19240450859,-160.06733349064882,cda-server-6,24,-196.3103197721101,{},3120,10.157.146.6,{},-149.96283505629324,0,1200,2025-08-29_18-56-52,130,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756486612,50.0,156000,15620.19240450859,120.13118314743042,130
+157200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 102758.833, 'num_steps_sampled': 157200, 'update_time_ms': 2.466, 'num_steps_trained': 157200, 'load_time_ms': 0.615, 'default': {'kl': 0.01456800103187561, 'cur_lr': 4.999999873689376e-05, 'entropy': 15.418561935424805, 'total_loss': 153.66900634765625, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13157440721988678, 'vf_explained_var': 0.8986132740974426, 'vf_loss': 153.78582763671875}, 'grad_time_ms': 742.747}",3934253,15726.398941993713,-159.49411724190676,cda-server-6,24,-202.14320656776363,{},3144,10.157.146.6,{},-149.96283505629324,0,1200,2025-08-29_18-58-38,131,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756486718,50.0,157200,15726.398941993713,106.20653748512268,131
+158400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 103739.538, 'num_steps_sampled': 158400, 'update_time_ms': 2.501, 'num_steps_trained': 158400, 'load_time_ms': 0.614, 'default': {'kl': 0.016706252470612526, 'cur_lr': 4.999999873689376e-05, 'entropy': 15.317606925964355, 'total_loss': 98.88345336914062, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1393449604511261, 'vf_explained_var': 0.933607816696167, 'vf_loss': 99.0058822631836}, 'grad_time_ms': 710.562}",3934253,15834.054826974869,-159.85676489001088,cda-server-6,24,-202.14320656776363,{},3168,10.157.146.6,{},-150.4201484124871,0,1200,2025-08-29_19-00-26,132,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756486826,50.0,158400,15834.054826974869,107.6558849811554,132
+159600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 103062.235, 'num_steps_sampled': 159600, 'update_time_ms': 2.536, 'num_steps_trained': 159600, 'load_time_ms': 0.624, 'default': {'kl': 0.016824984923005104, 'cur_lr': 4.999999873689376e-05, 'entropy': 15.423134803771973, 'total_loss': 95.9524917602539, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12300290167331696, 'vf_explained_var': 0.9352494478225708, 'vf_loss': 96.05846405029297}, 'grad_time_ms': 700.816}",3934253,15936.423606872559,-159.00856716484094,cda-server-6,24,-202.14320656776363,{},3192,10.157.146.6,{},-147.035794824748,0,1200,2025-08-29_19-02-08,133,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756486928,50.0,159600,15936.423606872559,102.36877989768982,133
+160800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 104477.854, 'num_steps_sampled': 160800, 'update_time_ms': 2.555, 'num_steps_trained': 160800, 'load_time_ms': 0.651, 'default': {'kl': 0.01582499034702778, 'cur_lr': 4.999999873689376e-05, 'entropy': 15.176227569580078, 'total_loss': 150.65570068359375, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14522379636764526, 'vf_explained_var': 0.9030457735061646, 'vf_loss': 150.78488159179688}, 'grad_time_ms': 673.787}",3934253,16058.623097419739,-159.1646082147905,cda-server-6,24,-202.14320656776363,{},3216,10.157.146.6,{},-147.035794824748,0,1200,2025-08-29_19-04-11,134,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756487051,50.0,160800,16058.623097419739,122.19949054718018,134
+162000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 106125.728, 'num_steps_sampled': 162000, 'update_time_ms': 2.586, 'num_steps_trained': 162000, 'load_time_ms': 0.656, 'default': {'kl': 0.01506539061665535, 'cur_lr': 4.999999873689376e-05, 'entropy': 15.253995895385742, 'total_loss': 106.41146850585938, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11677607893943787, 'vf_explained_var': 0.9423614144325256, 'vf_loss': 106.51298522949219}, 'grad_time_ms': 696.908}",3934253,16167.699571847916,-158.73587543872088,cda-server-6,24,-193.33495906545753,{},3240,10.157.146.6,{},-147.035794824748,0,1200,2025-08-29_19-06-00,135,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756487160,50.0,162000,16167.699571847916,109.07647442817688,135
+163200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 106775.9, 'num_steps_sampled': 163200, 'update_time_ms': 2.556, 'num_steps_trained': 163200, 'load_time_ms': 0.653, 'default': {'kl': 0.014284864068031311, 'cur_lr': 4.999999873689376e-05, 'entropy': 15.149747848510742, 'total_loss': 78.15953063964844, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13005171716213226, 'vf_explained_var': 0.9475562572479248, 'vf_loss': 78.27511596679688}, 'grad_time_ms': 702.728}",3934253,16273.625362873077,-158.2200610019019,cda-server-6,24,-194.0735576508897,{},3264,10.157.146.6,{},-147.035794824748,0,1200,2025-08-29_19-07-46,136,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756487266,50.0,163200,16273.625362873077,105.92579102516174,136
+164400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 105439.688, 'num_steps_sampled': 164400, 'update_time_ms': 2.471, 'num_steps_trained': 164400, 'load_time_ms': 0.652, 'default': {'kl': 0.015326268039643764, 'cur_lr': 4.999999873689376e-05, 'entropy': 15.038931846618652, 'total_loss': 76.04287719726562, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1274718940258026, 'vf_explained_var': 0.9446787238121033, 'vf_loss': 76.15482330322266}, 'grad_time_ms': 704.662}",3934253,16374.442579507828,-158.5093182311461,cda-server-6,24,-194.0735576508897,{},3288,10.157.146.6,{},-151.32592374317068,0,1200,2025-08-29_19-09-26,137,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756487366,50.0,164400,16374.442579507828,100.81721663475037,137
+165600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 105544.13, 'num_steps_sampled': 165600, 'update_time_ms': 2.483, 'num_steps_trained': 165600, 'load_time_ms': 0.653, 'default': {'kl': 0.01429035235196352, 'cur_lr': 4.999999873689376e-05, 'entropy': 15.274619102478027, 'total_loss': 113.89822387695312, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.119236521422863, 'vf_explained_var': 0.9221948981285095, 'vf_loss': 114.00298309326172}, 'grad_time_ms': 691.319}",3934253,16482.721665859222,-158.44117571903706,cda-server-6,24,-195.68461275679073,{},3312,10.157.146.6,{},-151.32592374317068,0,1200,2025-08-29_19-11-15,138,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756487475,50.0,165600,16482.721665859222,108.27908635139465,138
+166800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 108306.367, 'num_steps_sampled': 166800, 'update_time_ms': 2.444, 'num_steps_trained': 166800, 'load_time_ms': 0.647, 'default': {'kl': 0.015218976885080338, 'cur_lr': 4.999999873689376e-05, 'entropy': 15.024642944335938, 'total_loss': 92.8395004272461, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12442895770072937, 'vf_explained_var': 0.9352640509605408, 'vf_loss': 92.94851684570312}, 'grad_time_ms': 685.966}",3934253,16590.070190668106,-158.71721453232985,cda-server-6,24,-195.68461275679073,{},3336,10.157.146.6,{},-146.55230270325862,0,1200,2025-08-29_19-13-02,139,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756487582,50.0,166800,16590.070190668106,107.34852480888367,139
+168000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 107962.214, 'num_steps_sampled': 168000, 'update_time_ms': 2.448, 'num_steps_trained': 168000, 'load_time_ms': 0.642, 'default': {'kl': 0.012888466008007526, 'cur_lr': 4.999999873689376e-05, 'entropy': 15.229165077209473, 'total_loss': 126.61551666259766, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11669489741325378, 'vf_explained_var': 0.9207143783569336, 'vf_loss': 126.71916961669922}, 'grad_time_ms': 678.166}",3934253,16706.68172430992,-159.03158914373972,cda-server-6,24,-209.50328456745822,{},3360,10.157.146.6,{},-141.81058536609197,0,1200,2025-08-29_19-14-59,140,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756487699,50.0,168000,16706.68172430992,116.61153364181519,140
+169200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 108109.572, 'num_steps_sampled': 169200, 'update_time_ms': 2.434, 'num_steps_trained': 169200, 'load_time_ms': 0.641, 'default': {'kl': 0.014938879758119583, 'cur_lr': 4.999999873689376e-05, 'entropy': 15.037114143371582, 'total_loss': 56.33360290527344, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12748420238494873, 'vf_explained_var': 0.9595043659210205, 'vf_loss': 56.44596862792969}, 'grad_time_ms': 681.575}",3934253,16814.394966363907,-158.5527541966109,cda-server-6,24,-209.50328456745822,{},3384,10.157.146.6,{},-141.81058536609197,0,1200,2025-08-29_19-16-46,141,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756487806,50.0,169200,16814.394966363907,107.7132420539856,141
+170400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 106676.728, 'num_steps_sampled': 170400, 'update_time_ms': 2.39, 'num_steps_trained': 170400, 'load_time_ms': 0.64, 'default': {'kl': 0.015551741234958172, 'cur_lr': 4.999999873689376e-05, 'entropy': 15.132568359375, 'total_loss': 95.65824890136719, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12652461230754852, 'vf_explained_var': 0.9271260499954224, 'vf_loss': 95.76902770996094}, 'grad_time_ms': 703.401}",3934253,16907.940058231354,-158.10055911942175,cda-server-6,24,-209.50328456745822,{},3408,10.157.146.6,{},-141.81058536609197,0,1200,2025-08-29_19-18-20,142,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756487900,50.0,170400,16907.940058231354,93.5450918674469,142
+171600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 106873.675, 'num_steps_sampled': 171600, 'update_time_ms': 2.316, 'num_steps_trained': 171600, 'load_time_ms': 0.638, 'default': {'kl': 0.01617261953651905, 'cur_lr': 4.999999873689376e-05, 'entropy': 15.013951301574707, 'total_loss': 65.23526000976562, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13598722219467163, 'vf_explained_var': 0.9518048763275146, 'vf_loss': 65.35486602783203}, 'grad_time_ms': 719.478}",3934253,17012.4382250309,-158.30730094810116,cda-server-6,24,-209.50328456745822,{},3432,10.157.146.6,{},-141.81058536609197,0,1200,2025-08-29_19-20-05,143,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756488005,50.0,171600,17012.4382250309,104.49816679954529,143
+172800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 106235.145, 'num_steps_sampled': 172800, 'update_time_ms': 2.349, 'num_steps_trained': 172800, 'load_time_ms': 0.622, 'default': {'kl': 0.015435976907610893, 'cur_lr': 4.999999873689376e-05, 'entropy': 15.000235557556152, 'total_loss': 62.51327896118164, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13944146037101746, 'vf_explained_var': 0.9515180587768555, 'vf_loss': 62.63710021972656}, 'grad_time_ms': 750.126}",3934253,17128.5585501194,-156.8989274949609,cda-server-6,24,-175.5255590819791,{},3456,10.157.146.6,{},-149.70738469206646,0,1200,2025-08-29_19-22-01,144,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756488121,50.0,172800,17128.5585501194,116.12032508850098,144
+174000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 105283.376, 'num_steps_sampled': 174000, 'update_time_ms': 2.302, 'num_steps_trained': 174000, 'load_time_ms': 0.619, 'default': {'kl': 0.014956353232264519, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.847784996032715, 'total_loss': 104.35578918457031, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12986616790294647, 'vf_explained_var': 0.9379231333732605, 'vf_loss': 104.47049713134766}, 'grad_time_ms': 733.99}",3934253,17227.954606294632,-156.9433194148839,cda-server-6,24,-187.17942537200705,{},3480,10.157.146.6,{},-149.70738469206646,0,1200,2025-08-29_19-23-40,145,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756488220,50.0,174000,17227.954606294632,99.39605617523193,145
+175200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 105196.062, 'num_steps_sampled': 175200, 'update_time_ms': 2.314, 'num_steps_trained': 175200, 'load_time_ms': 0.621, 'default': {'kl': 0.013884143903851509, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.91396713256836, 'total_loss': 66.7122802734375, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13310521841049194, 'vf_explained_var': 0.9542436599731445, 'vf_loss': 66.83132934570312}, 'grad_time_ms': 707.662}",3934253,17332.743657827377,-157.06993405255005,cda-server-6,24,-202.21004607666393,{},3504,10.157.146.6,{},-149.70738469206646,0,1200,2025-08-29_19-25-25,146,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756488325,50.0,175200,17332.743657827377,104.78905153274536,146
+176400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 105773.671, 'num_steps_sampled': 176400, 'update_time_ms': 2.328, 'num_steps_trained': 176400, 'load_time_ms': 0.616, 'default': {'kl': 0.015176494605839252, 'cur_lr': 4.999999873689376e-05, 'entropy': 15.008373260498047, 'total_loss': 79.77460479736328, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11993683874607086, 'vf_explained_var': 0.9480642676353455, 'vf_loss': 79.87918090820312}, 'grad_time_ms': 704.527}",3934253,17439.30501151085,-157.1401521045944,cda-server-6,24,-202.21004607666393,{},3528,10.157.146.6,{},-147.43059014043487,0,1200,2025-08-29_19-27-11,147,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756488431,50.0,176400,17439.30501151085,106.56135368347168,147
+177600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 102626.23, 'num_steps_sampled': 177600, 'update_time_ms': 2.288, 'num_steps_trained': 177600, 'load_time_ms': 0.614, 'default': {'kl': 0.016340788453817368, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.866175651550293, 'total_loss': 50.99203872680664, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.127852201461792, 'vf_explained_var': 0.9572170972824097, 'vf_loss': 51.10334777832031}, 'grad_time_ms': 713.616}",3934253,17516.197714090347,-156.9326692679125,cda-server-6,24,-202.21004607666393,{},3552,10.157.146.6,{},-145.1666515668931,0,1200,2025-08-29_19-28-28,148,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756488508,50.0,177600,17516.197714090347,76.89270257949829,148
+178800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 102912.524, 'num_steps_sampled': 178800, 'update_time_ms': 2.288, 'num_steps_trained': 178800, 'load_time_ms': 0.611, 'default': {'kl': 0.015348482877016068, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.877336502075195, 'total_loss': 92.67220306396484, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.15013960003852844, 'vf_explained_var': 0.9410419464111328, 'vf_loss': 92.80680847167969}, 'grad_time_ms': 722.665}",3934253,17626.49950647354,-158.18542591613408,cda-server-6,24,-232.173069817677,{},3576,10.157.146.6,{},-145.1666515668931,0,1200,2025-08-29_19-30-19,149,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756488619,50.0,178800,17626.49950647354,110.30179238319397,149
+180000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 103219.799, 'num_steps_sampled': 180000, 'update_time_ms': 2.304, 'num_steps_trained': 180000, 'load_time_ms': 0.611, 'default': {'kl': 0.014295445755124092, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.858844757080078, 'total_loss': 46.206031799316406, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11927267909049988, 'vf_explained_var': 0.9663113355636597, 'vf_loss': 46.310829162597656}, 'grad_time_ms': 728.864}",3934253,17746.24654841423,-158.00782030045582,cda-server-6,24,-232.173069817677,{},3600,10.157.146.6,{},-145.1666515668931,0,1200,2025-08-29_19-32-18,150,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756488738,50.0,180000,17746.24654841423,119.74704194068909,150
+181200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 103434.054, 'num_steps_sampled': 181200, 'update_time_ms': 2.36, 'num_steps_trained': 181200, 'load_time_ms': 0.615, 'default': {'kl': 0.015794552862644196, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.848892211914062, 'total_loss': 82.28297424316406, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12699836492538452, 'vf_explained_var': 0.9383652806282043, 'vf_loss': 82.39397430419922}, 'grad_time_ms': 718.235}",3934253,17855.997240543365,-157.82890956270467,cda-server-6,24,-232.173069817677,{},3624,10.157.146.6,{},-145.1666515668931,0,1200,2025-08-29_19-34-08,151,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756488848,50.0,181200,17855.997240543365,109.75069212913513,151
+182400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 106840.299, 'num_steps_sampled': 182400, 'update_time_ms': 2.391, 'num_steps_trained': 182400, 'load_time_ms': 0.621, 'default': {'kl': 0.01783747598528862, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.777881622314453, 'total_loss': 92.23494720458984, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1375618278980255, 'vf_explained_var': 0.9388156533241272, 'vf_loss': 92.35443878173828}, 'grad_time_ms': 703.231}",3934253,17983.454869747162,-157.61030282202955,cda-server-6,24,-232.173069817677,{},3648,10.157.146.6,{},-148.19687584877354,0,1200,2025-08-29_19-36-16,152,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756488976,50.0,182400,17983.454869747162,127.45762920379639,152
+183600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 107353.866, 'num_steps_sampled': 183600, 'update_time_ms': 2.486, 'num_steps_trained': 183600, 'load_time_ms': 0.649, 'default': {'kl': 0.014833658933639526, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.644444465637207, 'total_loss': 77.48524475097656, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12767963111400604, 'vf_explained_var': 0.9482372999191284, 'vf_loss': 77.597900390625}, 'grad_time_ms': 670.73}",3934253,18092.76464152336,-156.80859157196807,cda-server-6,24,-232.173069817677,{},3672,10.157.146.6,{},-147.6635856393042,0,1200,2025-08-29_19-38-05,153,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756489085,50.0,183600,18092.76464152336,109.30977177619934,153
+184800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 106945.08, 'num_steps_sampled': 184800, 'update_time_ms': 2.504, 'num_steps_trained': 184800, 'load_time_ms': 0.626, 'default': {'kl': 0.014095836319029331, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.716404914855957, 'total_loss': 50.62611770629883, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11327210813760757, 'vf_explained_var': 0.9626729488372803, 'vf_loss': 50.72511672973633}, 'grad_time_ms': 643.517}",3934253,18204.524663448334,-155.84383666926186,cda-server-6,24,-187.26780230902494,{},3696,10.157.146.6,{},-147.6635856393042,0,1200,2025-08-29_19-39-57,154,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756489197,50.0,184800,18204.524663448334,111.76002192497253,154
+186000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 106655.26, 'num_steps_sampled': 186000, 'update_time_ms': 2.491, 'num_steps_trained': 186000, 'load_time_ms': 0.629, 'default': {'kl': 0.014999334700405598, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.71993350982666, 'total_loss': 34.203369140625, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1288701295852661, 'vf_explained_var': 0.9726163148880005, 'vf_loss': 34.31705093383789}, 'grad_time_ms': 661.285}",3934253,18301.200717687607,-155.7455358243003,cda-server-6,24,-187.26780230902494,{},3720,10.157.146.6,{},-147.6635856393042,0,1200,2025-08-29_19-41-33,155,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756489293,50.0,186000,18301.200717687607,96.67605423927307,155
+187200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 106523.155, 'num_steps_sampled': 187200, 'update_time_ms': 2.514, 'num_steps_trained': 187200, 'load_time_ms': 0.636, 'default': {'kl': 0.01529185101389885, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.741146087646484, 'total_loss': 63.08943557739258, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13563477993011475, 'vf_explained_var': 0.9613681435585022, 'vf_loss': 63.20958709716797}, 'grad_time_ms': 687.403}",3934253,18404.930746793747,-156.05749131747933,cda-server-6,24,-187.26780230902494,{},3744,10.157.146.6,{},-147.6635856393042,0,1200,2025-08-29_19-43-17,156,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756489397,50.0,187200,18404.930746793747,103.73002910614014,156
+188400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 106234.606, 'num_steps_sampled': 188400, 'update_time_ms': 2.53, 'num_steps_trained': 188400, 'load_time_ms': 0.638, 'default': {'kl': 0.01576061360538006, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.539962768554688, 'total_loss': 51.49734878540039, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12901757657527924, 'vf_explained_var': 0.9635226726531982, 'vf_loss': 51.61040496826172}, 'grad_time_ms': 680.89}",3934253,18508.54259133339,-156.18697868414674,cda-server-6,24,-187.26780230902494,{},3768,10.157.146.6,{},-149.05326009298292,0,1200,2025-08-29_19-45-01,157,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756489501,50.0,188400,18508.54259133339,103.61184453964233,157
+189600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 110393.008, 'num_steps_sampled': 189600, 'update_time_ms': 2.544, 'num_steps_trained': 189600, 'load_time_ms': 0.639, 'default': {'kl': 0.015366671606898308, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.503620147705078, 'total_loss': 44.8004264831543, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1365458071231842, 'vf_explained_var': 0.96863853931427, 'vf_loss': 44.921409606933594}, 'grad_time_ms': 683.531}",3934253,18627.046046733856,-156.31557634699521,cda-server-6,24,-170.84411173980249,{},3792,10.157.146.6,{},-150.2876891507201,0,1200,2025-08-29_19-46-59,158,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756489619,50.0,189600,18627.046046733856,118.50345540046692,158
+190800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 111784.543, 'num_steps_sampled': 190800, 'update_time_ms': 2.52, 'num_steps_trained': 190800, 'load_time_ms': 0.64, 'default': {'kl': 0.015097062103450298, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.573460578918457, 'total_loss': 45.253807067871094, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12723152339458466, 'vf_explained_var': 0.9695051908493042, 'vf_loss': 45.36575698852539}, 'grad_time_ms': 680.895}",3934253,18751.23653268814,-156.06897775264233,cda-server-6,24,-170.84411173980249,{},3816,10.157.146.6,{},-142.36662116168563,0,1200,2025-08-29_19-49-04,159,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756489744,50.0,190800,18751.23653268814,124.19048595428467,159
+192000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 110586.829, 'num_steps_sampled': 192000, 'update_time_ms': 2.531, 'num_steps_trained': 192000, 'load_time_ms': 0.642, 'default': {'kl': 0.015620950609445572, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.42264175415039, 'total_loss': 44.00412368774414, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12604941427707672, 'vf_explained_var': 0.9683871865272522, 'vf_loss': 44.11436462402344}, 'grad_time_ms': 669.18}",3934253,18858.888377189636,-155.75848397267933,cda-server-6,24,-168.91110461407595,{},3840,10.157.146.6,{},-142.36662116168563,0,1200,2025-08-29_19-50-51,160,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756489851,50.0,192000,18858.888377189636,107.65184450149536,160
+193200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 111276.767, 'num_steps_sampled': 193200, 'update_time_ms': 2.516, 'num_steps_trained': 193200, 'load_time_ms': 0.647, 'default': {'kl': 0.014775075949728489, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.54749870300293, 'total_loss': 58.11050033569336, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1309185028076172, 'vf_explained_var': 0.9599100947380066, 'vf_loss': 58.22645950317383}, 'grad_time_ms': 689.36}",3934253,18975.739804506302,-155.97364649677118,cda-server-6,24,-192.82730108260392,{},3864,10.157.146.6,{},-142.36662116168563,0,1200,2025-08-29_19-52-48,161,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756489968,50.0,193200,18975.739804506302,116.85142731666565,161
+194400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 107563.772, 'num_steps_sampled': 194400, 'update_time_ms': 2.474, 'num_steps_trained': 194400, 'load_time_ms': 0.641, 'default': {'kl': 0.015671495348215103, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.508405685424805, 'total_loss': 40.06678009033203, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12871819734573364, 'vf_explained_var': 0.9688021540641785, 'vf_loss': 40.179630279541016}, 'grad_time_ms': 698.056}",3934253,19066.154118299484,-155.86087650370283,cda-server-6,24,-192.82730108260392,{},3888,10.157.146.6,{},-142.36662116168563,0,1200,2025-08-29_19-54-19,162,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756490059,50.0,194400,19066.154118299484,90.41431379318237,162
+195600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 107835.158, 'num_steps_sampled': 195600, 'update_time_ms': 2.403, 'num_steps_trained': 195600, 'load_time_ms': 0.609, 'default': {'kl': 0.01576964743435383, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.222159385681152, 'total_loss': 38.615726470947266, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12422147393226624, 'vf_explained_var': 0.972466766834259, 'vf_loss': 38.72397994995117}, 'grad_time_ms': 734.539}",3934253,19178.542206048965,-155.77523854605596,cda-server-6,24,-192.82730108260392,{},3912,10.157.146.6,{},-150.75634943073578,0,1200,2025-08-29_19-56-11,163,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756490171,50.0,195600,19178.542206048965,112.3880877494812,163
+196800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 105948.854, 'num_steps_sampled': 196800, 'update_time_ms': 2.353, 'num_steps_trained': 196800, 'load_time_ms': 0.613, 'default': {'kl': 0.01632661558687687, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.407248497009277, 'total_loss': 66.69595336914062, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1468474119901657, 'vf_explained_var': 0.9592094421386719, 'vf_loss': 66.82626342773438}, 'grad_time_ms': 758.658}",3934253,19271.680288791656,-155.52057609509816,cda-server-6,24,-192.82730108260392,{},3936,10.157.146.6,{},-150.59467953216102,0,1200,2025-08-29_19-57-44,164,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756490264,50.0,196800,19271.680288791656,93.13808274269104,164
+198000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 106547.882, 'num_steps_sampled': 198000, 'update_time_ms': 2.368, 'num_steps_trained': 198000, 'load_time_ms': 0.612, 'default': {'kl': 0.014598803594708443, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.533857345581055, 'total_loss': 43.06224060058594, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12842096388339996, 'vf_explained_var': 0.9679848551750183, 'vf_loss': 43.17587661743164}, 'grad_time_ms': 757.885}",3934253,19374.33864402771,-155.1683275163884,cda-server-6,24,-186.20441058789976,{},3960,10.157.146.6,{},-149.0035912566383,0,1200,2025-08-29_19-59-27,165,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756490367,50.0,198000,19374.33864402771,102.65835523605347,165
+199200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 105139.279, 'num_steps_sampled': 199200, 'update_time_ms': 2.332, 'num_steps_trained': 199200, 'load_time_ms': 0.608, 'default': {'kl': 0.014788919128477573, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.377288818359375, 'total_loss': 48.58959197998047, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12106100469827652, 'vf_explained_var': 0.9630370736122131, 'vf_loss': 48.6956787109375}, 'grad_time_ms': 758.957}",3934253,19463.992821216583,-154.9477786673612,cda-server-6,24,-186.20441058789976,{},3984,10.157.146.6,{},-139.68382772036009,0,1200,2025-08-29_20-00-56,166,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756490456,50.0,199200,19463.992821216583,89.65417718887329,166
+200400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 107294.494, 'num_steps_sampled': 200400, 'update_time_ms': 2.31, 'num_steps_trained': 200400, 'load_time_ms': 0.607, 'default': {'kl': 0.015912381932139397, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.272615432739258, 'total_loss': 42.7900390625, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13069111108779907, 'vf_explained_var': 0.9659023284912109, 'vf_loss': 42.90461730957031}, 'grad_time_ms': 754.259}",3934253,19589.108632087708,-155.09633164691525,cda-server-6,24,-186.20441058789976,{},4008,10.157.146.6,{},-139.68382772036009,0,1200,2025-08-29_20-03-02,167,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756490582,50.0,200400,19589.108632087708,125.11581087112427,167
+201600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 105697.83, 'num_steps_sampled': 201600, 'update_time_ms': 2.295, 'num_steps_trained': 201600, 'load_time_ms': 0.607, 'default': {'kl': 0.014985193498432636, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.434755325317383, 'total_loss': 35.87843322753906, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13099879026412964, 'vf_explained_var': 0.9744190573692322, 'vf_loss': 35.994258880615234}, 'grad_time_ms': 757.804}",3934253,19691.680990934372,-155.6347589901296,cda-server-6,24,-186.20441058789976,{},4032,10.157.146.6,{},-138.59291754226575,0,1200,2025-08-29_20-04-44,168,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756490684,50.0,201600,19691.680990934372,102.57235884666443,168
+202800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 102117.547, 'num_steps_sampled': 202800, 'update_time_ms': 2.31, 'num_steps_trained': 202800, 'load_time_ms': 0.623, 'default': {'kl': 0.015808604657649994, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.588302612304688, 'total_loss': 55.970699310302734, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13252593576908112, 'vf_explained_var': 0.9560667872428894, 'vf_loss': 56.08721923828125}, 'grad_time_ms': 754.829}",3934253,19780.03944683075,-155.08221493769696,cda-server-6,24,-169.66817220868816,{},4056,10.157.146.6,{},-138.59291754226575,0,1200,2025-08-29_20-06-13,169,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756490773,50.0,202800,19780.03944683075,88.35845589637756,169
+204000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 101115.095, 'num_steps_sampled': 204000, 'update_time_ms': 2.281, 'num_steps_trained': 204000, 'load_time_ms': 0.621, 'default': {'kl': 0.01640515774488449, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.44264030456543, 'total_loss': 33.83430099487305, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12805338203907013, 'vf_explained_var': 0.9746472835540771, 'vf_loss': 33.94574737548828}, 'grad_time_ms': 776.615}",3934253,19877.884481191635,-155.26188435914753,cda-server-6,24,-169.66817220868816,{},4080,10.157.146.6,{},-138.59291754226575,0,1200,2025-08-29_20-07-50,170,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756490870,50.0,204000,19877.884481191635,97.84503436088562,170
+205200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 98892.183, 'num_steps_sampled': 205200, 'update_time_ms': 2.217, 'num_steps_trained': 205200, 'load_time_ms': 0.621, 'default': {'kl': 0.015521807596087456, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.46370792388916, 'total_loss': 60.55887222290039, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1447797566652298, 'vf_explained_var': 0.9552225470542908, 'vf_loss': 60.68794250488281}, 'grad_time_ms': 763.954}",3934253,19972.380245923996,-155.33247669421817,cda-server-6,24,-171.77568078754396,{},4104,10.157.146.6,{},-138.59291754226575,0,1200,2025-08-29_20-09-25,171,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756490965,50.0,205200,19972.380245923996,94.49576473236084,171
+206400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 100614.225, 'num_steps_sampled': 206400, 'update_time_ms': 2.248, 'num_steps_trained': 206400, 'load_time_ms': 0.622, 'default': {'kl': 0.015789611265063286, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.3331298828125, 'total_loss': 48.4068717956543, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13671469688415527, 'vf_explained_var': 0.9638553261756897, 'vf_loss': 48.5275993347168}, 'grad_time_ms': 775.318}",3934253,20080.128808498383,-155.01792419325568,cda-server-6,24,-171.77568078754396,{},4128,10.157.146.6,{},-144.12471496163798,0,1200,2025-08-29_20-11-13,172,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756491073,50.0,206400,20080.128808498383,107.7485625743866,172
+207600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 98688.004, 'num_steps_sampled': 207600, 'update_time_ms': 2.24, 'num_steps_trained': 207600, 'load_time_ms': 0.622, 'default': {'kl': 0.014612250961363316, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.156961441040039, 'total_loss': 42.88018035888672, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11420790106058121, 'vf_explained_var': 0.9699710011482239, 'vf_loss': 42.97959899902344}, 'grad_time_ms': 761.07}",3934253,20173.1121134758,-155.37836022919845,cda-server-6,24,-171.77568078754396,{},4152,10.157.146.6,{},-144.12471496163798,0,1200,2025-08-29_20-12-46,173,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756491166,50.0,207600,20173.1121134758,92.98330497741699,173
+208800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 99523.342, 'num_steps_sampled': 208800, 'update_time_ms': 2.206, 'num_steps_trained': 208800, 'load_time_ms': 0.621, 'default': {'kl': 0.014946643263101578, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.175942420959473, 'total_loss': 57.44399642944336, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13504861295223236, 'vf_explained_var': 0.9574512839317322, 'vf_loss': 57.56391143798828}, 'grad_time_ms': 765.866}",3934253,20274.651047468185,-155.56842982858154,cda-server-6,24,-185.3305242842277,{},4176,10.157.146.6,{},-144.12471496163798,0,1200,2025-08-29_20-14-27,174,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756491267,50.0,208800,20274.651047468185,101.53893399238586,174
+210000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 98719.862, 'num_steps_sampled': 210000, 'update_time_ms': 2.231, 'num_steps_trained': 210000, 'load_time_ms': 0.622, 'default': {'kl': 0.014225161634385586, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.324098587036133, 'total_loss': 59.64179229736328, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12911520898342133, 'vf_explained_var': 0.9586576819419861, 'vf_loss': 59.75650405883789}, 'grad_time_ms': 739.584}",3934253,20369.012630462646,-155.78960833378852,cda-server-6,24,-185.3305242842277,{},4200,10.157.146.6,{},-144.12471496163798,0,1200,2025-08-29_20-16-02,175,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756491362,50.0,210000,20369.012630462646,94.36158299446106,175
+211200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 98927.785, 'num_steps_sampled': 211200, 'update_time_ms': 2.238, 'num_steps_trained': 211200, 'load_time_ms': 0.621, 'default': {'kl': 0.015747396275401115, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.27517032623291, 'total_loss': 35.0083122253418, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.137986421585083, 'vf_explained_var': 0.9727128148078918, 'vf_loss': 35.13035202026367}, 'grad_time_ms': 746.11}",3934253,20460.81170296669,-155.69055132115284,cda-server-6,24,-185.3305242842277,{},4224,10.157.146.6,{},-148.52867451145093,0,1200,2025-08-29_20-17-33,176,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756491453,50.0,211200,20460.81170296669,91.79907250404358,176
+212400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94410.29, 'num_steps_sampled': 212400, 'update_time_ms': 2.289, 'num_steps_trained': 212400, 'load_time_ms': 0.621, 'default': {'kl': 0.01412759255617857, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.326051712036133, 'total_loss': 44.30991744995117, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12913967669010162, 'vf_explained_var': 0.9653099775314331, 'vf_loss': 44.42475891113281}, 'grad_time_ms': 758.482}",3934253,20540.877601861954,-155.4291076016693,cda-server-6,24,-185.3305242842277,{},4248,10.157.146.6,{},-148.52867451145093,0,1200,2025-08-29_20-18-53,177,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756491533,50.0,212400,20540.877601861954,80.06589889526367,177
+213600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94658.357, 'num_steps_sampled': 213600, 'update_time_ms': 2.329, 'num_steps_trained': 213600, 'load_time_ms': 0.617, 'default': {'kl': 0.014271329157054424, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.212542533874512, 'total_loss': 57.12761688232422, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12536929547786713, 'vf_explained_var': 0.9578894376754761, 'vf_loss': 57.23854064941406}, 'grad_time_ms': 738.686}",3934253,20645.73306274414,-155.17733970045182,cda-server-6,24,-175.80811120532408,{},4272,10.157.146.6,{},-148.15369681094623,0,1200,2025-08-29_20-20-38,178,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756491638,50.0,213600,20645.73306274414,104.85546088218689,178
+214800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94257.458, 'num_steps_sampled': 214800, 'update_time_ms': 2.356, 'num_steps_trained': 214800, 'load_time_ms': 0.602, 'default': {'kl': 0.014027887023985386, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.08896255493164, 'total_loss': 54.14302062988281, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13360297679901123, 'vf_explained_var': 0.9601472020149231, 'vf_loss': 54.26241683959961}, 'grad_time_ms': 753.871}",3934253,20730.234143018723,-154.9975954160479,cda-server-6,24,-174.65220154558435,{},4296,10.157.146.6,{},-145.09918014006897,0,1200,2025-08-29_20-22-03,179,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756491723,50.0,214800,20730.234143018723,84.50108027458191,179
+216000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94974.4, 'num_steps_sampled': 216000, 'update_time_ms': 2.376, 'num_steps_trained': 216000, 'load_time_ms': 0.604, 'default': {'kl': 0.015153449028730392, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.264097213745117, 'total_loss': 61.21241760253906, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13657930493354797, 'vf_explained_var': 0.9583113789558411, 'vf_loss': 61.33365249633789}, 'grad_time_ms': 739.851}",3934253,20835.10924553871,-155.41010977496163,cda-server-6,24,-187.8065284956767,{},4320,10.157.146.6,{},-143.37941258015238,0,1200,2025-08-29_20-23-48,180,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756491828,50.0,216000,20835.10924553871,104.87510251998901,180
+217200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93982.285, 'num_steps_sampled': 217200, 'update_time_ms': 2.417, 'num_steps_trained': 217200, 'load_time_ms': 0.606, 'default': {'kl': 0.016186289489269257, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.127467155456543, 'total_loss': 64.62361145019531, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13219882547855377, 'vf_explained_var': 0.9601544141769409, 'vf_loss': 64.73941040039062}, 'grad_time_ms': 749.797}",3934253,20919.783405065536,-155.7737927614949,cda-server-6,24,-187.8065284956767,{},4344,10.157.146.6,{},-143.37941258015238,0,1200,2025-08-29_20-25-12,181,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756491912,50.0,217200,20919.783405065536,84.67415952682495,181
+218400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93673.265, 'num_steps_sampled': 218400, 'update_time_ms': 2.428, 'num_steps_trained': 218400, 'load_time_ms': 0.612, 'default': {'kl': 0.016756556928157806, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.09090805053711, 'total_loss': 32.69347381591797, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13112007081508636, 'vf_explained_var': 0.9762242436408997, 'vf_loss': 32.8076286315918}, 'grad_time_ms': 753.985}",3934253,21024.484308958054,-155.6975634315122,cda-server-6,24,-187.8065284956767,{},4368,10.157.146.6,{},-143.37941258015238,0,1200,2025-08-29_20-26-57,182,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756492017,50.0,218400,21024.484308958054,104.70090389251709,182
+219600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 96073.239, 'num_steps_sampled': 219600, 'update_time_ms': 2.455, 'num_steps_trained': 219600, 'load_time_ms': 0.609, 'default': {'kl': 0.014003668911755085, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.073108673095703, 'total_loss': 43.03472137451172, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12060434371232986, 'vf_explained_var': 0.9658010005950928, 'vf_loss': 43.14114761352539}, 'grad_time_ms': 756.764}",3934253,21141.494768619537,-155.79708932386183,cda-server-6,24,-187.8065284956767,{},4392,10.157.146.6,{},-143.37941258015238,0,1200,2025-08-29_20-28-54,183,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756492134,50.0,219600,21141.494768619537,117.01045966148376,183
+220800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93381.741, 'num_steps_sampled': 220800, 'update_time_ms': 2.479, 'num_steps_trained': 220800, 'load_time_ms': 0.607, 'default': {'kl': 0.015979474410414696, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.951452255249023, 'total_loss': 28.970035552978516, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13051539659500122, 'vf_explained_var': 0.9753679633140564, 'vf_loss': 29.084373474121094}, 'grad_time_ms': 750.692}",3934253,21216.059225797653,-155.2159485927954,cda-server-6,24,-174.74615890433003,{},4416,10.157.146.6,{},-149.9222426574402,0,1200,2025-08-29_20-30-09,184,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756492209,50.0,220800,21216.059225797653,74.56445717811584,184
+222000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 96436.116, 'num_steps_sampled': 222000, 'update_time_ms': 2.503, 'num_steps_trained': 222000, 'load_time_ms': 0.606, 'default': {'kl': 0.015772182494401932, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.0059232711792, 'total_loss': 60.55994415283203, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1361880898475647, 'vf_explained_var': 0.9553515315055847, 'vf_loss': 60.68016052246094}, 'grad_time_ms': 762.586}",3934253,21341.083225011826,-154.90547833395576,cda-server-6,24,-180.30431492076218,{},4440,10.157.146.6,{},-139.678197164373,0,1200,2025-08-29_20-32-14,185,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756492334,50.0,222000,21341.083225011826,125.02399921417236,185
+223200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95140.264, 'num_steps_sampled': 223200, 'update_time_ms': 2.507, 'num_steps_trained': 223200, 'load_time_ms': 0.605, 'default': {'kl': 0.015124676749110222, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.924979209899902, 'total_loss': 37.755821228027344, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13534162938594818, 'vf_explained_var': 0.9738060832023621, 'vf_loss': 37.87584686279297}, 'grad_time_ms': 752.513}",3934253,21419.822728157043,-154.85165366221167,cda-server-6,24,-180.30431492076218,{},4464,10.157.146.6,{},-139.678197164373,0,1200,2025-08-29_20-33-33,186,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756492413,50.0,223200,21419.822728157043,78.7395031452179,186
+224400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 97230.341, 'num_steps_sampled': 224400, 'update_time_ms': 2.447, 'num_steps_trained': 224400, 'load_time_ms': 0.603, 'default': {'kl': 0.01550869271159172, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.100944519042969, 'total_loss': 33.599151611328125, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1279132217168808, 'vf_explained_var': 0.9754032492637634, 'vf_loss': 33.711360931396484}, 'grad_time_ms': 745.508}",3934253,21520.718727827072,-154.45393718739763,cda-server-6,24,-180.30431492076218,{},4488,10.157.146.6,{},-139.678197164373,0,1200,2025-08-29_20-35-13,187,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756492513,50.0,224400,21520.718727827072,100.89599967002869,187
+225600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 96492.054, 'num_steps_sampled': 225600, 'update_time_ms': 2.439, 'num_steps_trained': 225600, 'load_time_ms': 0.606, 'default': {'kl': 0.015070527791976929, 'cur_lr': 4.999999873689376e-05, 'entropy': 14.055828094482422, 'total_loss': 31.14375114440918, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1401119828224182, 'vf_explained_var': 0.9760143756866455, 'vf_loss': 31.26860237121582}, 'grad_time_ms': 770.442}",3934253,21618.440562963486,-154.46611208847494,cda-server-6,24,-180.30431492076218,{},4512,10.157.146.6,{},-139.678197164373,0,1200,2025-08-29_20-36-51,188,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756492611,50.0,225600,21618.440562963486,97.72183513641357,188
+226800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 97484.15, 'num_steps_sampled': 226800, 'update_time_ms': 2.429, 'num_steps_trained': 226800, 'load_time_ms': 0.609, 'default': {'kl': 0.017055794596672058, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.9029541015625, 'total_loss': 34.64968490600586, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13037782907485962, 'vf_explained_var': 0.975419282913208, 'vf_loss': 34.76279830932617}, 'grad_time_ms': 758.748}",3934253,21712.745859384537,-154.22806128008008,cda-server-6,24,-165.64550611361074,{},4536,10.157.146.6,{},-150.59684708886275,0,1200,2025-08-29_20-38-26,189,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756492706,50.0,226800,21712.745859384537,94.30529642105103,189
+228000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 98561.022, 'num_steps_sampled': 228000, 'update_time_ms': 2.417, 'num_steps_trained': 228000, 'load_time_ms': 0.636, 'default': {'kl': 0.015165035612881184, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.919445037841797, 'total_loss': 50.03436279296875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13474352657794952, 'vf_explained_var': 0.9623463153839111, 'vf_loss': 50.153751373291016}, 'grad_time_ms': 757.404}",3934253,21828.377017736435,-153.72495777307597,cda-server-6,24,-165.64550611361074,{},4560,10.157.146.6,{},-138.56659806083067,0,1200,2025-08-29_20-40-21,190,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756492821,50.0,228000,21828.377017736435,115.6311583518982,190
+229200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 101165.068, 'num_steps_sampled': 229200, 'update_time_ms': 2.484, 'num_steps_trained': 229200, 'load_time_ms': 0.661, 'default': {'kl': 0.015464269556105137, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.737651824951172, 'total_loss': 23.389896392822266, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13375920057296753, 'vf_explained_var': 0.9811168313026428, 'vf_loss': 23.50799560546875}, 'grad_time_ms': 738.619}",3934253,21938.90476822853,-153.30432291500162,cda-server-6,24,-165.64550611361074,{},4584,10.157.146.6,{},-138.56659806083067,0,1200,2025-08-29_20-42-12,191,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756492932,50.0,229200,21938.90476822853,110.52775049209595,191
+230400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 101400.198, 'num_steps_sampled': 230400, 'update_time_ms': 2.456, 'num_steps_trained': 230400, 'load_time_ms': 0.657, 'default': {'kl': 0.01649424433708191, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.909981727600098, 'total_loss': 30.5107421875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1406993716955185, 'vf_explained_var': 0.9759019017219543, 'vf_loss': 30.63473892211914}, 'grad_time_ms': 709.028}",3934253,22045.659630537033,-153.2856807334686,cda-server-6,24,-169.60459859319087,{},4608,10.157.146.6,{},-138.56659806083067,0,1200,2025-08-29_20-43-59,192,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756493039,50.0,230400,22045.659630537033,106.7548623085022,192
+231600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 99165.403, 'num_steps_sampled': 231600, 'update_time_ms': 2.423, 'num_steps_trained': 231600, 'load_time_ms': 0.655, 'default': {'kl': 0.014766073785722256, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.603525161743164, 'total_loss': 46.67988586425781, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12098463624715805, 'vf_explained_var': 0.9633685350418091, 'vf_loss': 46.78591537475586}, 'grad_time_ms': 715.39}",3934253,22140.386114120483,-153.8011387577607,cda-server-6,24,-169.60459859319087,{},4632,10.157.146.6,{},-138.56659806083067,0,1200,2025-08-29_20-45-33,193,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756493133,50.0,231600,22140.386114120483,94.72648358345032,193
+232800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 103001.441, 'num_steps_sampled': 232800, 'update_time_ms': 2.447, 'num_steps_trained': 232800, 'load_time_ms': 0.673, 'default': {'kl': 0.016012491658329964, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.908878326416016, 'total_loss': 31.52902603149414, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12783116102218628, 'vf_explained_var': 0.9771274924278259, 'vf_loss': 31.64064598083496}, 'grad_time_ms': 727.675}",3934253,22253.433773756027,-154.14178573010508,cda-server-6,24,-169.60459859319087,{},4656,10.157.146.6,{},-138.56659806083067,0,1200,2025-08-29_20-47-26,194,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756493246,50.0,232800,22253.433773756027,113.04765963554382,194
+234000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 101708.056, 'num_steps_sampled': 234000, 'update_time_ms': 2.457, 'num_steps_trained': 234000, 'load_time_ms': 0.669, 'default': {'kl': 0.015001552179455757, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.77999496459961, 'total_loss': 35.390708923339844, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12304878234863281, 'vf_explained_var': 0.9735833406448364, 'vf_loss': 35.498565673828125}, 'grad_time_ms': 728.105}",3934253,22365.52901148796,-154.76540725376347,cda-server-6,24,-169.60459859319087,{},4680,10.157.146.6,{},-150.01169480783062,0,1200,2025-08-29_20-49-18,195,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756493358,50.0,234000,22365.52901148796,112.0952377319336,195
+235200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 101804.077, 'num_steps_sampled': 235200, 'update_time_ms': 2.433, 'num_steps_trained': 235200, 'load_time_ms': 0.671, 'default': {'kl': 0.01647140271961689, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.635623931884766, 'total_loss': 52.579586029052734, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13822194933891296, 'vf_explained_var': 0.9653752446174622, 'vf_loss': 52.70112609863281}, 'grad_time_ms': 738.72}",3934253,22445.33472752571,-154.83888058703687,cda-server-6,24,-177.26881957412837,{},4704,10.157.146.6,{},-146.65800145858734,0,1200,2025-08-29_20-50-38,196,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756493438,50.0,235200,22445.33472752571,79.80571603775024,196
+236400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 102468.947, 'num_steps_sampled': 236400, 'update_time_ms': 2.481, 'num_steps_trained': 236400, 'load_time_ms': 0.678, 'default': {'kl': 0.015982117503881454, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.740926742553711, 'total_loss': 18.595247268676758, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12767238914966583, 'vf_explained_var': 0.9844462275505066, 'vf_loss': 18.706737518310547}, 'grad_time_ms': 738.975}",3934253,22552.882929325104,-154.33678146760286,cda-server-6,24,-177.26881957412837,{},4728,10.157.146.6,{},-146.65800145858734,0,1200,2025-08-29_20-52-26,197,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756493546,50.0,236400,22552.882929325104,107.5482017993927,197
+237600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 103790.417, 'num_steps_sampled': 237600, 'update_time_ms': 2.534, 'num_steps_trained': 237600, 'load_time_ms': 0.673, 'default': {'kl': 0.014276721514761448, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.760214805603027, 'total_loss': 41.77705383300781, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1205190122127533, 'vf_explained_var': 0.969501793384552, 'vf_loss': 41.88311767578125}, 'grad_time_ms': 718.444}",3934253,22663.61433315277,-154.44646720506114,cda-server-6,24,-201.16762912816088,{},4752,10.157.146.6,{},-146.65800145858734,0,1200,2025-08-29_20-54-17,198,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756493657,50.0,237600,22663.61433315277,110.73140382766724,198
+238800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 104968.257, 'num_steps_sampled': 238800, 'update_time_ms': 2.498, 'num_steps_trained': 238800, 'load_time_ms': 0.673, 'default': {'kl': 0.01621420495212078, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.574341773986816, 'total_loss': 46.5653076171875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13176356256008148, 'vf_explained_var': 0.9670212268829346, 'vf_loss': 46.68064880371094}, 'grad_time_ms': 720.256}",3934253,22769.716091156006,-154.59433723746173,cda-server-6,24,-201.16762912816088,{},4776,10.157.146.6,{},-146.65800145858734,0,1200,2025-08-29_20-56-03,199,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756493763,50.0,238800,22769.716091156006,106.10175800323486,199
+240000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 105216.055, 'num_steps_sampled': 240000, 'update_time_ms': 2.462, 'num_steps_trained': 240000, 'load_time_ms': 0.641, 'default': {'kl': 0.014780566096305847, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.378012657165527, 'total_loss': 36.66807174682617, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14128637313842773, 'vf_explained_var': 0.9747660756111145, 'vf_loss': 36.79439163208008}, 'grad_time_ms': 725.314}",3934253,22887.873666524887,-154.4375084794087,cda-server-6,24,-201.16762912816088,{},4800,10.157.146.6,{},-149.0978238513307,0,1200,2025-08-29_20-58-01,200,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756493881,50.0,240000,22887.873666524887,118.15757536888123,200
+241200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 104910.777, 'num_steps_sampled': 241200, 'update_time_ms': 2.452, 'num_steps_trained': 241200, 'load_time_ms': 0.606, 'default': {'kl': 0.013666907325387001, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.640439987182617, 'total_loss': 49.74296188354492, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12500447034835815, 'vf_explained_var': 0.9647335410118103, 'vf_loss': 49.85413360595703}, 'grad_time_ms': 735.828}",3934253,22995.453704595566,-154.5080193255974,cda-server-6,24,-201.16762912816088,{},4824,10.157.146.6,{},-146.52584462153817,0,1200,2025-08-29_20-59-48,201,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756493988,50.0,241200,22995.453704595566,107.58003807067871,201
+242400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 102243.239, 'num_steps_sampled': 242400, 'update_time_ms': 2.469, 'num_steps_trained': 242400, 'load_time_ms': 0.604, 'default': {'kl': 0.015573102980852127, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.60585880279541, 'total_loss': 44.72560501098633, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11865009367465973, 'vf_explained_var': 0.9653467535972595, 'vf_loss': 44.828487396240234}, 'grad_time_ms': 732.415}",3934253,23075.499824762344,-154.390515036229,cda-server-6,24,-201.16762912816088,{},4848,10.157.146.6,{},-144.64655797683017,0,1200,2025-08-29_21-01-09,202,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756494069,50.0,242400,23075.499824762344,80.04612016677856,202
+243600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 103775.09, 'num_steps_sampled': 243600, 'update_time_ms': 2.511, 'num_steps_trained': 243600, 'load_time_ms': 0.607, 'default': {'kl': 0.01503191888332367, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.590577125549316, 'total_loss': 42.913047790527344, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11701390147209167, 'vf_explained_var': 0.9663113355636597, 'vf_loss': 43.01484298706055}, 'grad_time_ms': 733.825}",3934253,23185.55954527855,-153.60317569799324,cda-server-6,24,-165.37361769890444,{},4872,10.157.146.6,{},-138.32310226038112,0,1200,2025-08-29_21-02-59,203,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756494179,50.0,243600,23185.55954527855,110.05972051620483,203
+244800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 102549.786, 'num_steps_sampled': 244800, 'update_time_ms': 2.504, 'num_steps_trained': 244800, 'load_time_ms': 0.591, 'default': {'kl': 0.016744563356041908, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.430876731872559, 'total_loss': 37.94887924194336, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13519436120986938, 'vf_explained_var': 0.9705941677093506, 'vf_loss': 38.06712341308594}, 'grad_time_ms': 721.255}",3934253,23286.228005886078,-153.52169316652558,cda-server-6,24,-168.51605431528077,{},4896,10.157.146.6,{},-138.32310226038112,0,1200,2025-08-29_21-04-39,204,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756494279,50.0,244800,23286.228005886078,100.66846060752869,204
+246000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 101212.226, 'num_steps_sampled': 246000, 'update_time_ms': 2.459, 'num_steps_trained': 246000, 'load_time_ms': 0.596, 'default': {'kl': 0.01710333861410618, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.417792320251465, 'total_loss': 38.24384689331055, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.144325852394104, 'vf_explained_var': 0.9721401929855347, 'vf_loss': 38.37085723876953}, 'grad_time_ms': 737.0}",3934253,23385.1042368412,-153.58170670594885,cda-server-6,24,-168.51605431528077,{},4920,10.157.146.6,{},-138.32310226038112,0,1200,2025-08-29_21-06-18,205,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756494378,50.0,246000,23385.1042368412,98.8762309551239,205
+247200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 103806.858, 'num_steps_sampled': 247200, 'update_time_ms': 2.565, 'num_steps_trained': 247200, 'load_time_ms': 0.614, 'default': {'kl': 0.015271955169737339, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.395512580871582, 'total_loss': 54.9863166809082, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14978620409965515, 'vf_explained_var': 0.9647194743156433, 'vf_loss': 55.12063980102539}, 'grad_time_ms': 729.754}",3934253,23490.784667491913,-153.93582146379998,cda-server-6,24,-168.51605431528077,{},4944,10.157.146.6,{},-138.32310226038112,0,1200,2025-08-29_21-08-04,206,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756494484,50.0,247200,23490.784667491913,105.68043065071106,206
+248400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 103302.599, 'num_steps_sampled': 248400, 'update_time_ms': 2.567, 'num_steps_trained': 248400, 'load_time_ms': 0.611, 'default': {'kl': 0.013181351125240326, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.397079467773438, 'total_loss': 47.21424865722656, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1278305947780609, 'vf_explained_var': 0.9659013748168945, 'vf_loss': 47.328731536865234}, 'grad_time_ms': 724.667}",3934253,23593.239156246185,-154.25386999328757,cda-server-6,24,-181.2020651411598,{},4968,10.157.146.6,{},-140.24452928526324,0,1200,2025-08-29_21-09-46,207,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756494586,50.0,248400,23593.239156246185,102.45448875427246,207
+249600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 102177.445, 'num_steps_sampled': 249600, 'update_time_ms': 2.522, 'num_steps_trained': 249600, 'load_time_ms': 0.621, 'default': {'kl': 0.014789672568440437, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.347824096679688, 'total_loss': 56.15548324584961, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.139645516872406, 'vf_explained_var': 0.9593076705932617, 'vf_loss': 56.2801513671875}, 'grad_time_ms': 736.067}",3934253,23692.833278894424,-154.4865686886029,cda-server-6,24,-181.2020651411598,{},4992,10.157.146.6,{},-146.07667147403822,0,1200,2025-08-29_21-11-26,208,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756494686,50.0,249600,23692.833278894424,99.59412264823914,208
+250800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 101683.031, 'num_steps_sampled': 250800, 'update_time_ms': 2.546, 'num_steps_trained': 250800, 'load_time_ms': 0.626, 'default': {'kl': 0.015958771109580994, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.283158302307129, 'total_loss': 32.21907424926758, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14139731228351593, 'vf_explained_var': 0.9757466912269592, 'vf_loss': 32.34431457519531}, 'grad_time_ms': 744.585}",3934253,23794.076202869415,-154.57491315828824,cda-server-6,24,-181.2020651411598,{},5016,10.157.146.6,{},-146.07667147403822,0,1200,2025-08-29_21-13-07,209,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756494787,50.0,250800,23794.076202869415,101.24292397499084,209
+252000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 100320.453, 'num_steps_sampled': 252000, 'update_time_ms': 2.589, 'num_steps_trained': 252000, 'load_time_ms': 0.627, 'default': {'kl': 0.016961511224508286, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.437080383300781, 'total_loss': 28.432422637939453, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1382811963558197, 'vf_explained_var': 0.9765098094940186, 'vf_loss': 28.55352783203125}, 'grad_time_ms': 750.972}",3934253,23898.673114538193,-154.64418535655625,cda-server-6,24,-181.2020651411598,{},5040,10.157.146.6,{},-144.40869175206473,0,1200,2025-08-29_21-14-52,210,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756494892,50.0,252000,23898.673114538193,104.59691166877747,210
+253200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 101961.698, 'num_steps_sampled': 253200, 'update_time_ms': 2.533, 'num_steps_trained': 253200, 'load_time_ms': 0.649, 'default': {'kl': 0.015320269390940666, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.378397941589355, 'total_loss': 38.70890808105469, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13138148188591003, 'vf_explained_var': 0.9688937067985535, 'vf_loss': 38.82477951049805}, 'grad_time_ms': 745.729}",3934253,24022.612620592117,-154.84949540505792,cda-server-6,24,-177.23204123604674,{},5064,10.157.146.6,{},-144.40869175206473,0,1200,2025-08-29_21-16-56,211,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756495016,50.0,253200,24022.612620592117,123.93950605392456,211
+254400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 104314.751, 'num_steps_sampled': 254400, 'update_time_ms': 2.506, 'num_steps_trained': 254400, 'load_time_ms': 0.649, 'default': {'kl': 0.015276423655450344, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.541495323181152, 'total_loss': 35.86064910888672, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.135739266872406, 'vf_explained_var': 0.9777436852455139, 'vf_loss': 35.980918884277344}, 'grad_time_ms': 759.486}",3934253,24126.327362060547,-154.75495105972402,cda-server-6,24,-175.558753189674,{},5088,10.157.146.6,{},-142.65037420939933,0,1200,2025-08-29_21-18-40,212,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756495120,50.0,254400,24126.327362060547,103.71474146842957,212
+255600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 106540.291, 'num_steps_sampled': 255600, 'update_time_ms': 2.506, 'num_steps_trained': 255600, 'load_time_ms': 0.658, 'default': {'kl': 0.014562960714101791, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.229193687438965, 'total_loss': 27.504392623901367, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12833461165428162, 'vf_explained_var': 0.9786010384559631, 'vf_loss': 27.617982864379883}, 'grad_time_ms': 736.482}",3934253,24258.412103414536,-154.48851868906385,cda-server-6,24,-175.558753189674,{},5112,10.157.146.6,{},-142.65037420939933,0,1200,2025-08-29_21-20-52,213,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756495252,50.0,255600,24258.412103414536,132.08474135398865,213
+256800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 105849.683, 'num_steps_sampled': 256800, 'update_time_ms': 2.509, 'num_steps_trained': 256800, 'load_time_ms': 0.659, 'default': {'kl': 0.014660666696727276, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.363061904907227, 'total_loss': 40.02897644042969, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13153332471847534, 'vf_explained_var': 0.9764943718910217, 'vf_loss': 40.14567184448242}, 'grad_time_ms': 728.893}",3934253,24352.10121202469,-154.46203690448567,cda-server-6,24,-175.558753189674,{},5136,10.157.146.6,{},-142.65037420939933,0,1200,2025-08-29_21-22-25,214,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756495345,50.0,256800,24352.10121202469,93.6891086101532,214
+258000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 106030.498, 'num_steps_sampled': 258000, 'update_time_ms': 2.55, 'num_steps_trained': 258000, 'load_time_ms': 0.658, 'default': {'kl': 0.015574107877910137, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.24034595489502, 'total_loss': 23.518882751464844, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13245254755020142, 'vf_explained_var': 0.9813408255577087, 'vf_loss': 23.63556671142578}, 'grad_time_ms': 724.611}",3934253,24452.742853164673,-153.56616856118634,cda-server-6,24,-175.558753189674,{},5160,10.157.146.6,{},-142.65037420939933,0,1200,2025-08-29_21-24-06,215,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756495446,50.0,258000,24452.742853164673,100.64164113998413,215
+259200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 106625.713, 'num_steps_sampled': 259200, 'update_time_ms': 2.448, 'num_steps_trained': 259200, 'load_time_ms': 0.639, 'default': {'kl': 0.015010246075689793, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.155643463134766, 'total_loss': 42.44038009643555, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12642936408519745, 'vf_explained_var': 0.9697035551071167, 'vf_loss': 42.55160903930664}, 'grad_time_ms': 724.094}",3934253,24564.368947267532,-154.02823510503526,cda-server-6,24,-178.46962133035237,{},5184,10.157.146.6,{},-142.65037420939933,0,1200,2025-08-29_21-25-58,216,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756495558,50.0,259200,24564.368947267532,111.6260941028595,216
+260400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 106940.569, 'num_steps_sampled': 260400, 'update_time_ms': 2.416, 'num_steps_trained': 260400, 'load_time_ms': 0.636, 'default': {'kl': 0.015397397801280022, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.084989547729492, 'total_loss': 37.89418029785156, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1234256848692894, 'vf_explained_var': 0.9787766933441162, 'vf_loss': 38.002017974853516}, 'grad_time_ms': 719.41}",3934253,24669.925053358078,-154.47670628352498,cda-server-6,24,-178.46962133035237,{},5208,10.157.146.6,{},-144.31778136586442,0,1200,2025-08-29_21-27-43,217,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756495663,50.0,260400,24669.925053358078,105.55610609054565,217
+261600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 107460.8, 'num_steps_sampled': 261600, 'update_time_ms': 2.437, 'num_steps_trained': 261600, 'load_time_ms': 0.636, 'default': {'kl': 0.015226908959448338, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.149221420288086, 'total_loss': 47.839778900146484, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13463148474693298, 'vf_explained_var': 0.9676254987716675, 'vf_loss': 47.95899200439453}, 'grad_time_ms': 716.797}",3934253,24774.695830106735,-154.79985125114234,cda-server-6,24,-178.46962133035237,{},5232,10.157.146.6,{},-143.7615888181636,0,1200,2025-08-29_21-29-28,218,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756495768,50.0,261600,24774.695830106735,104.77077674865723,218
+262800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 109240.82, 'num_steps_sampled': 262800, 'update_time_ms': 2.408, 'num_steps_trained': 262800, 'load_time_ms': 0.629, 'default': {'kl': 0.013625938445329666, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.160884857177734, 'total_loss': 38.04711151123047, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12461742758750916, 'vf_explained_var': 0.9737904667854309, 'vf_loss': 38.15793228149414}, 'grad_time_ms': 707.953}",3934253,24893.649383544922,-154.8291380370024,cda-server-6,24,-178.46962133035237,{},5256,10.157.146.6,{},-143.7615888181636,0,1200,2025-08-29_21-31-27,219,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756495887,50.0,262800,24893.649383544922,118.95355343818665,219
+264000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 106807.186, 'num_steps_sampled': 264000, 'update_time_ms': 2.388, 'num_steps_trained': 264000, 'load_time_ms': 0.63, 'default': {'kl': 0.01404307596385479, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.15298080444336, 'total_loss': 49.057411193847656, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13220664858818054, 'vf_explained_var': 0.965358316898346, 'vf_loss': 49.17539978027344}, 'grad_time_ms': 709.126}",3934253,24973.92138981819,-154.6194946052812,cda-server-6,24,-172.94731992277121,{},5280,10.157.146.6,{},-143.7615888181636,0,1200,2025-08-29_21-32-47,220,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756495967,50.0,264000,24973.92138981819,80.27200627326965,220
+265200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 100237.639, 'num_steps_sampled': 265200, 'update_time_ms': 2.377, 'num_steps_trained': 265200, 'load_time_ms': 0.616, 'default': {'kl': 0.014865408651530743, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.083892822265625, 'total_loss': 21.751710891723633, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13358891010284424, 'vf_explained_var': 0.9818400144577026, 'vf_loss': 21.870248794555664}, 'grad_time_ms': 722.184}",3934253,25032.295438051224,-154.0848343995392,cda-server-6,24,-172.94731992277121,{},5304,10.157.146.6,{},-143.7615888181636,0,1200,2025-08-29_21-33-46,221,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756496026,50.0,265200,25032.295438051224,58.37404823303223,221
+266400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 102143.166, 'num_steps_sampled': 266400, 'update_time_ms': 2.42, 'num_steps_trained': 266400, 'load_time_ms': 0.617, 'default': {'kl': 0.014355365186929703, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.014341354370117, 'total_loss': 67.55355072021484, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13376568257808685, 'vf_explained_var': 0.9518985748291016, 'vf_loss': 67.67278289794922}, 'grad_time_ms': 737.906}",3934253,25155.22252869606,-154.03806347040836,cda-server-6,24,-174.09409334392393,{},5328,10.157.146.6,{},-145.41766044712392,0,1200,2025-08-29_21-35-49,222,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756496149,50.0,266400,25155.22252869606,122.92709064483643,222
+267600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 99729.598, 'num_steps_sampled': 267600, 'update_time_ms': 2.413, 'num_steps_trained': 267600, 'load_time_ms': 0.605, 'default': {'kl': 0.015129496343433857, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.16669750213623, 'total_loss': 23.677480697631836, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13773919641971588, 'vf_explained_var': 0.9821985960006714, 'vf_loss': 23.79990005493164}, 'grad_time_ms': 742.503}",3934253,25263.21758890152,-153.84540569107764,cda-server-6,24,-174.09409334392393,{},5352,10.157.146.6,{},-145.41766044712392,0,1200,2025-08-29_21-37-37,223,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756496257,50.0,267600,25263.21758890152,107.9950602054596,223
+268800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 99750.405, 'num_steps_sampled': 268800, 'update_time_ms': 2.491, 'num_steps_trained': 268800, 'load_time_ms': 0.613, 'default': {'kl': 0.014904823154211044, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.1945161819458, 'total_loss': 19.52242088317871, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14050191640853882, 'vf_explained_var': 0.9843916296958923, 'vf_loss': 19.647830963134766}, 'grad_time_ms': 760.738}",3934253,25357.295568943024,-153.5902486105291,cda-server-6,24,-175.99024313429615,{},5376,10.157.146.6,{},-145.11019265055916,0,1200,2025-08-29_21-39-11,224,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756496351,50.0,268800,25357.295568943024,94.0779800415039,224
+270000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 100893.525, 'num_steps_sampled': 270000, 'update_time_ms': 2.487, 'num_steps_trained': 270000, 'load_time_ms': 0.617, 'default': {'kl': 0.015589192509651184, 'cur_lr': 4.999999873689376e-05, 'entropy': 12.922922134399414, 'total_loss': 29.725852966308594, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13020819425582886, 'vf_explained_var': 0.9754431247711182, 'vf_loss': 29.84027862548828}, 'grad_time_ms': 755.303}",3934253,25469.313912391663,-153.95590331783544,cda-server-6,24,-175.99024313429615,{},5400,10.157.146.6,{},-144.9118933600018,0,1200,2025-08-29_21-41-03,225,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756496463,50.0,270000,25469.313912391663,112.01834344863892,225
+271200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 100926.159, 'num_steps_sampled': 271200, 'update_time_ms': 2.492, 'num_steps_trained': 271200, 'load_time_ms': 0.614, 'default': {'kl': 0.01505206897854805, 'cur_lr': 4.999999873689376e-05, 'entropy': 12.80807876586914, 'total_loss': 23.48046112060547, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13089901208877563, 'vf_explained_var': 0.980962872505188, 'vf_loss': 23.596118927001953}, 'grad_time_ms': 754.195}",3934253,25581.256008148193,-153.83885704993364,cda-server-6,24,-175.99024313429615,{},5424,10.157.146.6,{},-144.9118933600018,0,1200,2025-08-29_21-42-55,226,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756496575,50.0,271200,25581.256008148193,111.94209575653076,226
+272400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 100114.556, 'num_steps_sampled': 272400, 'update_time_ms': 2.515, 'num_steps_trained': 272400, 'load_time_ms': 0.613, 'default': {'kl': 0.015062487684190273, 'cur_lr': 4.999999873689376e-05, 'entropy': 12.996514320373535, 'total_loss': 24.47572898864746, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1414356827735901, 'vf_explained_var': 0.9804410934448242, 'vf_loss': 24.601913452148438}, 'grad_time_ms': 755.148}",3934253,25678.705996513367,-154.2308098880995,cda-server-6,24,-175.99024313429615,{},5448,10.157.146.6,{},-144.9118933600018,0,1200,2025-08-29_21-44-32,227,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756496672,50.0,272400,25678.705996513367,97.44998836517334,227
+273600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 97764.171, 'num_steps_sampled': 273600, 'update_time_ms': 2.513, 'num_steps_trained': 273600, 'load_time_ms': 0.605, 'default': {'kl': 0.014948169700801373, 'cur_lr': 4.999999873689376e-05, 'entropy': 12.985085487365723, 'total_loss': 51.06395721435547, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14840246737003326, 'vf_explained_var': 0.9613332748413086, 'vf_loss': 51.197227478027344}, 'grad_time_ms': 767.314}",3934253,25760.094562768936,-154.3562061237597,cda-server-6,24,-172.89268666728373,{},5472,10.157.146.6,{},-144.9118933600018,0,1200,2025-08-29_21-45-54,228,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756496754,50.0,273600,25760.094562768936,81.38856625556946,228
+274800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95601.535, 'num_steps_sampled': 274800, 'update_time_ms': 2.6, 'num_steps_trained': 274800, 'load_time_ms': 0.608, 'default': {'kl': 0.014542028307914734, 'cur_lr': 4.999999873689376e-05, 'entropy': 13.030766487121582, 'total_loss': 61.48118591308594, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13130980730056763, 'vf_explained_var': 0.9557677507400513, 'vf_loss': 61.5977783203125}, 'grad_time_ms': 769.226}",3934253,25857.44341278076,-155.07597284154912,cda-server-6,24,-191.1136767254141,{},5496,10.157.146.6,{},-148.96356347694825,0,1200,2025-08-29_21-47-31,229,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756496851,50.0,274800,25857.44341278076,97.34885001182556,229
+276000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 96856.808, 'num_steps_sampled': 276000, 'update_time_ms': 2.746, 'num_steps_trained': 276000, 'load_time_ms': 0.616, 'default': {'kl': 0.01313636265695095, 'cur_lr': 4.999999873689376e-05, 'entropy': 12.705184936523438, 'total_loss': 44.72980499267578, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12996193766593933, 'vf_explained_var': 0.966231644153595, 'vf_loss': 44.84646987915039}, 'grad_time_ms': 764.339}",3934253,25950.22126197815,-155.30187061257263,cda-server-6,24,-191.1136767254141,{},5520,10.157.146.6,{},-148.96356347694825,0,1200,2025-08-29_21-49-04,230,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756496944,50.0,276000,25950.22126197815,92.7778491973877,230
+277200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 101806.465, 'num_steps_sampled': 277200, 'update_time_ms': 2.732, 'num_steps_trained': 277200, 'load_time_ms': 0.619, 'default': {'kl': 0.016049357131123543, 'cur_lr': 4.999999873689376e-05, 'entropy': 12.747896194458008, 'total_loss': 39.389190673828125, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1357201188802719, 'vf_explained_var': 0.9743813276290894, 'vf_loss': 39.508663177490234}, 'grad_time_ms': 767.366}",3934253,26058.122532606125,-154.81526937116584,cda-server-6,24,-191.1136767254141,{},5544,10.157.146.6,{},-141.56158667514845,0,1200,2025-08-29_21-50-52,231,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756497052,50.0,277200,26058.122532606125,107.90127062797546,231
+278400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 99243.229, 'num_steps_sampled': 278400, 'update_time_ms': 2.714, 'num_steps_trained': 278400, 'load_time_ms': 0.616, 'default': {'kl': 0.014159131795167923, 'cur_lr': 4.999999873689376e-05, 'entropy': 12.914978981018066, 'total_loss': 33.23030471801758, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14277423918247223, 'vf_explained_var': 0.9747536182403564, 'vf_loss': 33.35874557495117}, 'grad_time_ms': 758.329}",3934253,26155.325921297073,-154.74883742173165,cda-server-6,24,-191.1136767254141,{},5568,10.157.146.6,{},-137.5857586828239,0,1200,2025-08-29_21-52-29,232,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756497149,50.0,278400,26155.325921297073,97.20338869094849,232
+279600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 99070.339, 'num_steps_sampled': 279600, 'update_time_ms': 2.659, 'num_steps_trained': 279600, 'load_time_ms': 0.619, 'default': {'kl': 0.014392811805009842, 'cur_lr': 4.999999873689376e-05, 'entropy': 12.926675796508789, 'total_loss': 25.39544105529785, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13040010631084442, 'vf_explained_var': 0.9801141023635864, 'vf_loss': 25.511268615722656}, 'grad_time_ms': 766.947}",3934253,26261.67698597908,-154.05910708740407,cda-server-6,24,-169.70926420317127,{},5592,10.157.146.6,{},-137.5857586828239,0,1200,2025-08-29_21-54-15,233,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756497255,50.0,279600,26261.67698597908,106.35106468200684,233
+280800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 99423.125, 'num_steps_sampled': 280800, 'update_time_ms': 2.548, 'num_steps_trained': 280800, 'load_time_ms': 0.608, 'default': {'kl': 0.015516189858317375, 'cur_lr': 4.999999873689376e-05, 'entropy': 12.509271621704102, 'total_loss': 37.31974792480469, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13313306868076324, 'vf_explained_var': 0.9727855324745178, 'vf_loss': 37.437171936035156}, 'grad_time_ms': 756.131}",3934253,26359.173065185547,-154.56187542044893,cda-server-6,24,-175.45024040060775,{},5616,10.157.146.6,{},-137.5857586828239,0,1200,2025-08-29_21-55-53,234,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756497353,50.0,280800,26359.173065185547,97.49607920646667,234
+282000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 99400.701, 'num_steps_sampled': 282000, 'update_time_ms': 2.538, 'num_steps_trained': 282000, 'load_time_ms': 0.61, 'default': {'kl': 0.015087624080479145, 'cur_lr': 4.999999873689376e-05, 'entropy': 12.51517105102539, 'total_loss': 36.4234619140625, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1278027892112732, 'vf_explained_var': 0.9724928736686707, 'vf_loss': 36.53599548339844}, 'grad_time_ms': 757.099}",3934253,26470.97898197174,-154.6107954352704,cda-server-6,24,-175.45024040060775,{},5640,10.157.146.6,{},-137.5857586828239,0,1200,2025-08-29_21-57-45,235,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756497465,50.0,282000,26470.97898197174,111.80591678619385,235
+283200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95406.385, 'num_steps_sampled': 283200, 'update_time_ms': 2.563, 'num_steps_trained': 283200, 'load_time_ms': 0.647, 'default': {'kl': 0.015108389779925346, 'cur_lr': 4.999999873689376e-05, 'entropy': 12.653817176818848, 'total_loss': 48.8950309753418, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13136011362075806, 'vf_explained_var': 0.9658221006393433, 'vf_loss': 49.01109313964844}, 'grad_time_ms': 743.824}",3934253,26542.84624195099,-154.18766838139035,cda-server-6,24,-175.45024040060775,{},5664,10.157.146.6,{},-147.4771196656932,0,1200,2025-08-29_21-58-56,236,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756497536,50.0,283200,26542.84624195099,71.86725997924805,236
+284400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94211.608, 'num_steps_sampled': 284400, 'update_time_ms': 2.569, 'num_steps_trained': 284400, 'load_time_ms': 0.657, 'default': {'kl': 0.014272380620241165, 'cur_lr': 4.999999873689376e-05, 'entropy': 12.560246467590332, 'total_loss': 31.587806701660156, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1402655392885208, 'vf_explained_var': 0.9775816798210144, 'vf_loss': 31.713619232177734}, 'grad_time_ms': 752.183}",3934253,26628.431704998016,-154.84538605775754,cda-server-6,24,-175.45024040060775,{},5688,10.157.146.6,{},-147.4771196656932,0,1200,2025-08-29_22-00-22,237,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756497622,50.0,284400,26628.431704998016,85.58546304702759,237
+285600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93826.904, 'num_steps_sampled': 285600, 'update_time_ms': 2.608, 'num_steps_trained': 285600, 'load_time_ms': 0.661, 'default': {'kl': 0.015821723267436028, 'cur_lr': 4.999999873689376e-05, 'entropy': 12.670042037963867, 'total_loss': 30.40340232849121, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12687571346759796, 'vf_explained_var': 0.9778980612754822, 'vf_loss': 30.514259338378906}, 'grad_time_ms': 749.683}",3934253,26705.948573827744,-154.4982256142866,cda-server-6,24,-172.66039303845443,{},5712,10.157.146.6,{},-142.12198176583468,0,1200,2025-08-29_22-01-40,238,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756497700,50.0,285600,26705.948573827744,77.51686882972717,238
+286800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94663.624, 'num_steps_sampled': 286800, 'update_time_ms': 2.587, 'num_steps_trained': 286800, 'load_time_ms': 0.671, 'default': {'kl': 0.01557975821197033, 'cur_lr': 4.999999873689376e-05, 'entropy': 12.703690528869629, 'total_loss': 32.40293502807617, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.139328271150589, 'vf_explained_var': 0.9742265343666077, 'vf_loss': 32.5264892578125}, 'grad_time_ms': 743.241}",3934253,26811.59946990013,-154.7133937321576,cda-server-6,24,-172.66039303845443,{},5736,10.157.146.6,{},-142.12198176583468,0,1200,2025-08-29_22-03-25,239,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756497805,50.0,286800,26811.59946990013,105.6508960723877,239
+288000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95221.891, 'num_steps_sampled': 288000, 'update_time_ms': 2.448, 'num_steps_trained': 288000, 'load_time_ms': 0.667, 'default': {'kl': 0.015021582134068012, 'cur_lr': 4.999999873689376e-05, 'entropy': 12.68139362335205, 'total_loss': 51.14398956298828, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12773293256759644, 'vf_explained_var': 0.9638887047767639, 'vf_loss': 51.25651550292969}, 'grad_time_ms': 739.327}",3934253,26909.918827056885,-155.13402580695703,cda-server-6,24,-174.34265858004116,{},5760,10.157.146.6,{},-142.12198176583468,0,1200,2025-08-29_22-05-04,240,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756497904,50.0,288000,26909.918827056885,98.31935715675354,240
+289200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95494.256, 'num_steps_sampled': 289200, 'update_time_ms': 2.609, 'num_steps_trained': 289200, 'load_time_ms': 0.661, 'default': {'kl': 0.01462772861123085, 'cur_lr': 4.999999873689376e-05, 'entropy': 12.194981575012207, 'total_loss': 20.053916931152344, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1390572488307953, 'vf_explained_var': 0.9844868779182434, 'vf_loss': 20.17816162109375}, 'grad_time_ms': 731.619}",3934253,27020.467235326767,-154.3404811106415,cda-server-6,24,-174.34265858004116,{},5784,10.157.146.6,{},-142.12198176583468,0,1200,2025-08-29_22-06-54,241,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756498014,50.0,289200,27020.467235326767,110.5484082698822,241
+290400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95563.006, 'num_steps_sampled': 290400, 'update_time_ms': 2.582, 'num_steps_trained': 290400, 'load_time_ms': 0.67, 'default': {'kl': 0.016566181555390358, 'cur_lr': 4.999999873689376e-05, 'entropy': 12.55049991607666, 'total_loss': 25.23848533630371, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13654492795467377, 'vf_explained_var': 0.9805251359939575, 'vf_loss': 25.35825538635254}, 'grad_time_ms': 724.998}",3934253,27118.29235434532,-154.16136676098563,cda-server-6,24,-174.34265858004116,{},5808,10.157.146.6,{},-148.8217025152694,0,1200,2025-08-29_22-08-32,242,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756498112,50.0,290400,27118.29235434532,97.82511901855469,242
+291600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94718.135, 'num_steps_sampled': 291600, 'update_time_ms': 2.636, 'num_steps_trained': 291600, 'load_time_ms': 0.67, 'default': {'kl': 0.014852155931293964, 'cur_lr': 4.999999873689376e-05, 'entropy': 12.296875953674316, 'total_loss': 28.0606689453125, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13112103939056396, 'vf_explained_var': 0.9789355397224426, 'vf_loss': 28.1767520904541}, 'grad_time_ms': 734.402}",3934253,27216.289939165115,-154.16419404181408,cda-server-6,24,-174.34265858004116,{},5832,10.157.146.6,{},-144.2930427633367,0,1200,2025-08-29_22-10-10,243,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756498210,50.0,291600,27216.289939165115,97.9975848197937,243
+292800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94072.851, 'num_steps_sampled': 292800, 'update_time_ms': 2.692, 'num_steps_trained': 292800, 'load_time_ms': 0.678, 'default': {'kl': 0.01521742157638073, 'cur_lr': 4.999999873689376e-05, 'entropy': 12.436162948608398, 'total_loss': 32.87732696533203, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1397152990102768, 'vf_explained_var': 0.974628746509552, 'vf_loss': 33.00163269042969}, 'grad_time_ms': 729.856}",3934253,27307.288112401962,-154.1744086331289,cda-server-6,24,-173.09618343276952,{},5856,10.157.146.6,{},-144.2930427633367,0,1200,2025-08-29_22-11-41,244,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756498301,50.0,292800,27307.288112401962,90.99817323684692,244
+294000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92770.585, 'num_steps_sampled': 294000, 'update_time_ms': 2.66, 'num_steps_trained': 294000, 'load_time_ms': 0.669, 'default': {'kl': 0.015452582389116287, 'cur_lr': 4.999999873689376e-05, 'entropy': 12.431663513183594, 'total_loss': 28.786949157714844, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12592917680740356, 'vf_explained_var': 0.9775936603546143, 'vf_loss': 28.89723777770996}, 'grad_time_ms': 734.721}",3934253,27406.1181910038,-154.34016486305367,cda-server-6,24,-173.09618343276952,{},5880,10.157.146.6,{},-144.2930427633367,0,1200,2025-08-29_22-13-20,245,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756498400,50.0,294000,27406.1181910038,98.83007860183716,245
+295200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95930.382, 'num_steps_sampled': 295200, 'update_time_ms': 2.688, 'num_steps_trained': 295200, 'load_time_ms': 0.632, 'default': {'kl': 0.014374022372066975, 'cur_lr': 4.999999873689376e-05, 'entropy': 12.280024528503418, 'total_loss': 37.74338912963867, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11980906873941422, 'vf_explained_var': 0.9736410975456238, 'vf_loss': 37.8486442565918}, 'grad_time_ms': 747.384}",3934253,27509.708899497986,-154.27605406898746,cda-server-6,24,-173.09618343276952,{},5904,10.157.146.6,{},-144.2930427633367,0,1200,2025-08-29_22-15-03,246,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756498503,50.0,295200,27509.708899497986,103.5907084941864,246
+296400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94837.908, 'num_steps_sampled': 296400, 'update_time_ms': 2.657, 'num_steps_trained': 296400, 'load_time_ms': 0.629, 'default': {'kl': 0.01566668227314949, 'cur_lr': 4.999999873689376e-05, 'entropy': 12.17396068572998, 'total_loss': 28.47240447998047, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12497733533382416, 'vf_explained_var': 0.9772866368293762, 'vf_loss': 28.58152198791504}, 'grad_time_ms': 753.058}",3934253,27584.426176071167,-154.29560239216406,cda-server-6,24,-175.1037563369774,{},5928,10.157.146.6,{},-151.14767500096642,0,1200,2025-08-29_22-16-18,247,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756498578,50.0,296400,27584.426176071167,74.71727657318115,247
+297600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 96071.038, 'num_steps_sampled': 297600, 'update_time_ms': 2.61, 'num_steps_trained': 297600, 'load_time_ms': 0.63, 'default': {'kl': 0.013279435224831104, 'cur_lr': 4.999999873689376e-05, 'entropy': 12.279629707336426, 'total_loss': 42.234100341796875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12782859802246094, 'vf_explained_var': 0.9741078615188599, 'vf_loss': 42.34848403930664}, 'grad_time_ms': 753.748}",3934253,27674.280586481094,-153.9695664898226,cda-server-6,24,-175.1037563369774,{},5952,10.157.146.6,{},-149.32841745117312,0,1200,2025-08-29_22-17-48,248,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756498668,50.0,297600,27674.280586481094,89.85441040992737,248
+298800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 97574.462, 'num_steps_sampled': 298800, 'update_time_ms': 2.616, 'num_steps_trained': 298800, 'load_time_ms': 0.651, 'default': {'kl': 0.01410535629838705, 'cur_lr': 4.999999873689376e-05, 'entropy': 12.100536346435547, 'total_loss': 19.91636085510254, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12572787702083588, 'vf_explained_var': 0.9843169450759888, 'vf_loss': 20.027809143066406}, 'grad_time_ms': 758.636}",3934253,27795.013806581497,-153.71891833490415,cda-server-6,24,-175.1037563369774,{},5976,10.157.146.6,{},-149.32841745117312,0,1200,2025-08-29_22-19-49,249,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756498789,50.0,298800,27795.013806581497,120.73322010040283,249
+300000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 97666.622, 'num_steps_sampled': 300000, 'update_time_ms': 2.658, 'num_steps_trained': 300000, 'load_time_ms': 0.651, 'default': {'kl': 0.012680845335125923, 'cur_lr': 4.999999873689376e-05, 'entropy': 12.221396446228027, 'total_loss': 41.20733642578125, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11321382969617844, 'vf_explained_var': 0.9742316007614136, 'vf_loss': 41.307708740234375}, 'grad_time_ms': 763.965}",3934253,27894.30849289894,-154.03355792080626,cda-server-6,24,-201.81562551481366,{},6000,10.157.146.6,{},-149.32841745117312,0,1200,2025-08-29_22-21-28,250,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756498888,50.0,300000,27894.30849289894,99.29468631744385,250
+301200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94007.579, 'num_steps_sampled': 301200, 'update_time_ms': 2.523, 'num_steps_trained': 301200, 'load_time_ms': 0.647, 'default': {'kl': 0.013563835062086582, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.968669891357422, 'total_loss': 31.392887115478516, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11377114802598953, 'vf_explained_var': 0.9757980704307556, 'vf_loss': 31.492923736572266}, 'grad_time_ms': 744.0}",3934253,27968.066175222397,-153.65693731382558,cda-server-6,24,-201.81562551481366,{},6024,10.157.146.6,{},-149.32841745117312,0,1200,2025-08-29_22-22-42,251,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756498962,50.0,301200,27968.066175222397,73.75768232345581,251
+302400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92510.76, 'num_steps_sampled': 302400, 'update_time_ms': 2.579, 'num_steps_trained': 302400, 'load_time_ms': 0.648, 'default': {'kl': 0.013669435866177082, 'cur_lr': 4.999999873689376e-05, 'entropy': 12.033707618713379, 'total_loss': 17.814746856689453, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11148720234632492, 'vf_explained_var': 0.9851120710372925, 'vf_loss': 17.912391662597656}, 'grad_time_ms': 751.818}",3934253,28051.0013692379,-153.92671987302916,cda-server-6,24,-201.81562551481366,{},6048,10.157.146.6,{},-149.90883747438755,0,1200,2025-08-29_22-24-05,252,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756499045,50.0,302400,28051.0013692379,82.93519401550293,252
+303600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92284.865, 'num_steps_sampled': 303600, 'update_time_ms': 2.535, 'num_steps_trained': 303600, 'load_time_ms': 0.657, 'default': {'kl': 0.01406802423298359, 'cur_lr': 4.999999873689376e-05, 'entropy': 12.27514362335205, 'total_loss': 20.965513229370117, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1320834904909134, 'vf_explained_var': 0.9830424189567566, 'vf_loss': 21.083351135253906}, 'grad_time_ms': 750.709}",3934253,28146.728314638138,-153.97191238060424,cda-server-6,24,-201.81562551481366,{},6072,10.157.146.6,{},-141.37306239201038,0,1200,2025-08-29_22-25-41,253,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756499141,50.0,303600,28146.728314638138,95.72694540023804,253
+304800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 91604.615, 'num_steps_sampled': 304800, 'update_time_ms': 2.51, 'num_steps_trained': 304800, 'load_time_ms': 0.665, 'default': {'kl': 0.014106563292443752, 'cur_lr': 4.999999873689376e-05, 'entropy': 12.195647239685059, 'total_loss': 38.15617752075195, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1277208924293518, 'vf_explained_var': 0.9729253053665161, 'vf_loss': 38.269615173339844}, 'grad_time_ms': 756.57}",3934253,28230.983020067215,-153.40373628066334,cda-server-6,24,-175.32770252462922,{},6096,10.157.146.6,{},-141.37306239201038,0,1200,2025-08-29_22-27-05,254,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756499225,50.0,304800,28230.983020067215,84.25470542907715,254
+306000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 90304.885, 'num_steps_sampled': 306000, 'update_time_ms': 2.523, 'num_steps_trained': 306000, 'load_time_ms': 0.669, 'default': {'kl': 0.014886324293911457, 'cur_lr': 4.999999873689376e-05, 'entropy': 12.2487211227417, 'total_loss': 19.486772537231445, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13814154267311096, 'vf_explained_var': 0.984747052192688, 'vf_loss': 19.609840393066406}, 'grad_time_ms': 749.583}",3934253,28316.745934963226,-153.5950026973953,cda-server-6,24,-175.32770252462922,{},6120,10.157.146.6,{},-141.37306239201038,0,1200,2025-08-29_22-28-31,255,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756499311,50.0,306000,28316.745934963226,85.76291489601135,255
+307200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 87739.019, 'num_steps_sampled': 307200, 'update_time_ms': 2.509, 'num_steps_trained': 307200, 'load_time_ms': 0.67, 'default': {'kl': 0.01419132947921753, 'cur_lr': 4.999999873689376e-05, 'entropy': 12.179950714111328, 'total_loss': 44.02378845214844, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12743514776229858, 'vf_explained_var': 0.9703550338745117, 'vf_loss': 44.1368522644043}, 'grad_time_ms': 743.555}",3934253,28394.618319272995,-153.71739596982954,cda-server-6,24,-175.32770252462922,{},6144,10.157.146.6,{},-141.37306239201038,0,1200,2025-08-29_22-29-49,256,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756499389,50.0,307200,28394.618319272995,77.87238430976868,256
+308400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 89687.698, 'num_steps_sampled': 308400, 'update_time_ms': 2.549, 'num_steps_trained': 308400, 'load_time_ms': 0.664, 'default': {'kl': 0.014530722051858902, 'cur_lr': 4.999999873689376e-05, 'entropy': 12.120244026184082, 'total_loss': 26.150606155395508, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.125518798828125, 'vf_explained_var': 0.9807274341583252, 'vf_loss': 26.261411666870117}, 'grad_time_ms': 747.631}",3934253,28488.863465070724,-153.58796723004997,cda-server-6,24,-175.32770252462922,{},6168,10.157.146.6,{},-141.45347079017628,0,1200,2025-08-29_22-31-23,257,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756499483,50.0,308400,28488.863465070724,94.24514579772949,257
+309600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 89355.613, 'num_steps_sampled': 309600, 'update_time_ms': 2.538, 'num_steps_trained': 309600, 'load_time_ms': 0.658, 'default': {'kl': 0.014274870045483112, 'cur_lr': 4.999999873689376e-05, 'entropy': 12.029433250427246, 'total_loss': 21.19289779663086, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14656893908977509, 'vf_explained_var': 0.9849755764007568, 'vf_loss': 21.325014114379883}, 'grad_time_ms': 743.267}",3934253,28575.35304093361,-153.65874506459247,cda-server-6,24,-167.30001100256214,{},6192,10.157.146.6,{},-141.45347079017628,0,1200,2025-08-29_22-32-49,258,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756499569,50.0,309600,28575.35304093361,86.48957586288452,258
+310800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 87597.555, 'num_steps_sampled': 310800, 'update_time_ms': 2.485, 'num_steps_trained': 310800, 'load_time_ms': 0.628, 'default': {'kl': 0.01563265360891819, 'cur_lr': 4.999999873689376e-05, 'entropy': 12.04366397857666, 'total_loss': 24.3173885345459, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13971100747585297, 'vf_explained_var': 0.9832693934440613, 'vf_loss': 24.441268920898438}, 'grad_time_ms': 740.667}",3934253,28678.47874569893,-153.58450695244758,cda-server-6,24,-167.7332204385451,{},6216,10.157.146.6,{},-141.45347079017628,0,1200,2025-08-29_22-34-32,259,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756499672,50.0,310800,28678.47874569893,103.12570476531982,259
+312000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 87057.199, 'num_steps_sampled': 312000, 'update_time_ms': 2.422, 'num_steps_trained': 312000, 'load_time_ms': 0.632, 'default': {'kl': 0.013417969457805157, 'cur_lr': 4.999999873689376e-05, 'entropy': 12.221627235412598, 'total_loss': 37.70539474487305, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12108760327100754, 'vf_explained_var': 0.9734055995941162, 'vf_loss': 37.81289291381836}, 'grad_time_ms': 740.404}",3934253,28772.3668551445,-153.13736615788864,cda-server-6,24,-170.00570466065776,{},6240,10.157.146.6,{},-140.7502885744889,0,1200,2025-08-29_22-36-06,260,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756499766,50.0,312000,28772.3668551445,93.8881094455719,260
+313200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 89045.252, 'num_steps_sampled': 313200, 'update_time_ms': 2.386, 'num_steps_trained': 313200, 'load_time_ms': 0.631, 'default': {'kl': 0.012220478616654873, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.781728744506836, 'total_loss': 74.40132904052734, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11190219968557358, 'vf_explained_var': 0.9513096213340759, 'vf_loss': 74.50086212158203}, 'grad_time_ms': 762.39}",3934253,28866.223863124847,-153.94081905848125,cda-server-6,24,-195.83850086707832,{},6264,10.157.146.6,{},-140.7502885744889,0,1200,2025-08-29_22-37-40,261,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756499860,50.0,313200,28866.223863124847,93.85700798034668,261
+314400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 89071.642, 'num_steps_sampled': 314400, 'update_time_ms': 2.394, 'num_steps_trained': 314400, 'load_time_ms': 0.625, 'default': {'kl': 0.014541917480528355, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.741612434387207, 'total_loss': 17.651187896728516, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13121682405471802, 'vf_explained_var': 0.9867935180664062, 'vf_loss': 17.76767921447754}, 'grad_time_ms': 775.037}",3934253,28949.54998254776,-153.74686526915812,cda-server-6,24,-195.83850086707832,{},6288,10.157.146.6,{},-140.7502885744889,0,1200,2025-08-29_22-39-04,262,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756499944,50.0,314400,28949.54998254776,83.3261194229126,262
+315600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 88088.332, 'num_steps_sampled': 315600, 'update_time_ms': 2.525, 'num_steps_trained': 315600, 'load_time_ms': 0.613, 'default': {'kl': 0.013870678842067719, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.782343864440918, 'total_loss': 18.725406646728516, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12153424322605133, 'vf_explained_var': 0.9850756525993347, 'vf_loss': 18.832895278930664}, 'grad_time_ms': 772.561}",3934253,29035.42023253441,-153.9387682014451,cda-server-6,24,-195.83850086707832,{},6312,10.157.146.6,{},-140.7502885744889,0,1200,2025-08-29_22-40-30,263,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756500030,50.0,315600,29035.42023253441,85.87024998664856,263
+316800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 88319.086, 'num_steps_sampled': 316800, 'update_time_ms': 2.537, 'num_steps_trained': 316800, 'load_time_ms': 0.604, 'default': {'kl': 0.015589484013617039, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.815381050109863, 'total_loss': 45.58415222167969, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13468068838119507, 'vf_explained_var': 0.9687883853912354, 'vf_loss': 45.70304870605469}, 'grad_time_ms': 773.986}",3934253,29121.998387098312,-154.28611412240772,cda-server-6,24,-195.83850086707832,{},6336,10.157.146.6,{},-149.05647309909892,0,1200,2025-08-29_22-41-56,264,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756500116,50.0,316800,29121.998387098312,86.57815456390381,264
+318000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 88821.475, 'num_steps_sampled': 318000, 'update_time_ms': 2.55, 'num_steps_trained': 318000, 'load_time_ms': 0.607, 'default': {'kl': 0.015006310306489468, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.8653564453125, 'total_loss': 29.010637283325195, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13054674863815308, 'vf_explained_var': 0.9786375761032104, 'vf_loss': 29.125986099243164}, 'grad_time_ms': 780.571}",3934253,29212.850786685944,-154.1596053466124,cda-server-6,24,-176.4381663197646,{},6360,10.157.146.6,{},-148.19820052487748,0,1200,2025-08-29_22-43-27,265,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756500207,50.0,318000,29212.850786685944,90.85239958763123,265
+319200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 88283.819, 'num_steps_sampled': 319200, 'update_time_ms': 2.533, 'num_steps_trained': 319200, 'load_time_ms': 0.611, 'default': {'kl': 0.016107451170682907, 'cur_lr': 4.999999873689376e-05, 'entropy': 12.022677421569824, 'total_loss': 37.852230072021484, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1311512589454651, 'vf_explained_var': 0.974249541759491, 'vf_loss': 37.967071533203125}, 'grad_time_ms': 794.547}",3934253,29285.48614835739,-154.15685653819614,cda-server-6,24,-176.4381663197646,{},6384,10.157.146.6,{},-136.70630152775394,0,1200,2025-08-29_22-44-40,266,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756500280,50.0,319200,29285.48614835739,72.63536167144775,266
+320400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 88743.608, 'num_steps_sampled': 320400, 'update_time_ms': 2.52, 'num_steps_trained': 320400, 'load_time_ms': 0.61, 'default': {'kl': 0.014153753407299519, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.821681022644043, 'total_loss': 27.39217758178711, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12194425612688065, 'vf_explained_var': 0.9820153713226318, 'vf_loss': 27.499794006347656}, 'grad_time_ms': 793.183}",3934253,29384.31538414955,-153.96803814397418,cda-server-6,24,-176.4381663197646,{},6408,10.157.146.6,{},-136.70630152775394,0,1200,2025-08-29_22-46-18,267,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756500378,50.0,320400,29384.31538414955,98.82923579216003,267
+321600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 87746.756, 'num_steps_sampled': 321600, 'update_time_ms': 2.571, 'num_steps_trained': 321600, 'load_time_ms': 0.612, 'default': {'kl': 0.013911773450672626, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.718981742858887, 'total_loss': 40.44329071044922, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13097091019153595, 'vf_explained_var': 0.9710770845413208, 'vf_loss': 40.560176849365234}, 'grad_time_ms': 797.005}",3934253,29460.875306606293,-153.7755560748365,cda-server-6,24,-176.4381663197646,{},6432,10.157.146.6,{},-136.70630152775394,0,1200,2025-08-29_22-47-35,268,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756500455,50.0,321600,29460.875306606293,76.55992245674133,268
+322800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 84188.524, 'num_steps_sampled': 322800, 'update_time_ms': 2.593, 'num_steps_trained': 322800, 'load_time_ms': 0.607, 'default': {'kl': 0.016193203628063202, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.588141441345215, 'total_loss': 32.77817153930664, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13464587926864624, 'vf_explained_var': 0.9762402772903442, 'vf_loss': 32.896419525146484}, 'grad_time_ms': 806.453}",3934253,29528.51364827156,-153.4607327425086,cda-server-6,24,-171.40863771827642,{},6456,10.157.146.6,{},-136.70630152775394,0,1200,2025-08-29_22-48-43,269,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756500523,50.0,322800,29528.51364827156,67.63834166526794,269
+324000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 84408.661, 'num_steps_sampled': 324000, 'update_time_ms': 2.626, 'num_steps_trained': 324000, 'load_time_ms': 0.596, 'default': {'kl': 0.01516958698630333, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.557772636413574, 'total_loss': 23.42417335510254, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12869912385940552, 'vf_explained_var': 0.9817376732826233, 'vf_loss': 23.537513732910156}, 'grad_time_ms': 806.745}",3934253,29624.60574412346,-153.33992347144647,cda-server-6,24,-171.40863771827642,{},6480,10.157.146.6,{},-142.72273321439698,0,1200,2025-08-29_22-50-19,270,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756500619,50.0,324000,29624.60574412346,96.0920958518982,270
+325200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 81621.057, 'num_steps_sampled': 325200, 'update_time_ms': 2.665, 'num_steps_trained': 325200, 'load_time_ms': 0.606, 'default': {'kl': 0.0140716303139925, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.617931365966797, 'total_loss': 23.010072708129883, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1258062869310379, 'vf_explained_var': 0.9814040660858154, 'vf_loss': 23.121633529663086}, 'grad_time_ms': 807.68}",3934253,29690.5972969532,-153.17287745568458,cda-server-6,24,-171.40863771827642,{},6504,10.157.146.6,{},-142.72273321439698,0,1200,2025-08-29_22-51-25,271,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756500685,50.0,325200,29690.5972969532,65.99155282974243,271
+326400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 82244.568, 'num_steps_sampled': 326400, 'update_time_ms': 2.656, 'num_steps_trained': 326400, 'load_time_ms': 0.61, 'default': {'kl': 0.013999907299876213, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.388032913208008, 'total_loss': 18.105144500732422, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11585116386413574, 'vf_explained_var': 0.9850890040397644, 'vf_loss': 18.20682144165039}, 'grad_time_ms': 788.997}",3934253,29779.971660375595,-152.95437416580322,cda-server-6,24,-166.7981294945134,{},6528,10.157.146.6,{},-142.31050554669037,0,1200,2025-08-29_22-52-54,272,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756500774,50.0,326400,29779.971660375595,89.3743634223938,272
+327600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 80115.644, 'num_steps_sampled': 327600, 'update_time_ms': 2.58, 'num_steps_trained': 327600, 'load_time_ms': 0.618, 'default': {'kl': 0.014399628154933453, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.7221097946167, 'total_loss': 35.62514877319336, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12929335236549377, 'vf_explained_var': 0.9736959934234619, 'vf_loss': 35.739864349365234}, 'grad_time_ms': 793.414}",3934253,29844.59642982483,-153.57078695792043,cda-server-6,24,-180.0083391494624,{},6552,10.157.146.6,{},-142.31050554669037,0,1200,2025-08-29_22-53-59,273,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756500839,50.0,327600,29844.59642982483,64.62476944923401,273
+328800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 81093.68, 'num_steps_sampled': 328800, 'update_time_ms': 2.564, 'num_steps_trained': 328800, 'load_time_ms': 0.614, 'default': {'kl': 0.013647317886352539, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.217588424682617, 'total_loss': 42.10578918457031, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10328339040279388, 'vf_explained_var': 0.9690133333206177, 'vf_loss': 42.19525909423828}, 'grad_time_ms': 794.475}",3934253,29940.964215040207,-153.39609191716633,cda-server-6,24,-180.0083391494624,{},6576,10.157.146.6,{},-142.31050554669037,0,1200,2025-08-29_22-55-35,274,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756500935,50.0,328800,29940.964215040207,96.36778521537781,274
+330000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 82563.125, 'num_steps_sampled': 330000, 'update_time_ms': 2.6, 'num_steps_trained': 330000, 'load_time_ms': 0.606, 'default': {'kl': 0.014787460677325726, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.442232131958008, 'total_loss': 25.26143455505371, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11275593191385269, 'vf_explained_var': 0.9793703556060791, 'vf_loss': 25.359216690063477}, 'grad_time_ms': 794.462}",3934253,30046.511551856995,-153.3767483996174,cda-server-6,24,-180.0083391494624,{},6600,10.157.146.6,{},-142.31050554669037,0,1200,2025-08-29_22-57-21,275,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756501041,50.0,330000,30046.511551856995,105.54733681678772,275
+331200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 85483.028, 'num_steps_sampled': 331200, 'update_time_ms': 2.634, 'num_steps_trained': 331200, 'load_time_ms': 0.6, 'default': {'kl': 0.015072625130414963, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.500537872314453, 'total_loss': 21.24437141418457, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13162878155708313, 'vf_explained_var': 0.9849632978439331, 'vf_loss': 21.36073875427246}, 'grad_time_ms': 778.258}",3934253,30148.183248519897,-153.38630465945496,cda-server-6,24,-180.0083391494624,{},6624,10.157.146.6,{},-145.2196053826522,0,1200,2025-08-29_22-59-02,276,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756501142,50.0,331200,30148.183248519897,101.67169666290283,276
+332400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 84406.381, 'num_steps_sampled': 332400, 'update_time_ms': 2.596, 'num_steps_trained': 332400, 'load_time_ms': 0.6, 'default': {'kl': 0.014994761906564236, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.437612533569336, 'total_loss': 18.316537857055664, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12036796659231186, 'vf_explained_var': 0.9855210781097412, 'vf_loss': 18.421722412109375}, 'grad_time_ms': 761.188}",3934253,30236.075475215912,-152.83274076860297,cda-server-6,24,-165.82521908204325,{},6648,10.157.146.6,{},-145.2196053826522,0,1200,2025-08-29_23-00-30,277,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756501230,50.0,332400,30236.075475215912,87.8922266960144,277
+333600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 85115.306, 'num_steps_sampled': 333600, 'update_time_ms': 2.554, 'num_steps_trained': 333600, 'load_time_ms': 0.604, 'default': {'kl': 0.013223753310739994, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.365351676940918, 'total_loss': 25.293102264404297, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12207407504320145, 'vf_explained_var': 0.9820523858070374, 'vf_loss': 25.40178871154785}, 'grad_time_ms': 757.229}",3934253,30319.68391394615,-152.7504248056896,cda-server-6,24,-165.82521908204325,{},6672,10.157.146.6,{},-145.2196053826522,0,1200,2025-08-29_23-01-54,278,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756501314,50.0,333600,30319.68391394615,83.60843873023987,278
+334800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 85352.692, 'num_steps_sampled': 334800, 'update_time_ms': 2.507, 'num_steps_trained': 334800, 'load_time_ms': 0.601, 'default': {'kl': 0.010769886896014214, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.56566333770752, 'total_loss': 98.25940704345703, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10573761910200119, 'vf_explained_var': 0.9470511674880981, 'vf_loss': 98.354248046875}, 'grad_time_ms': 751.919}",3934253,30389.643027305603,-153.30131133278667,cda-server-6,24,-208.3227003464183,{},6696,10.157.146.6,{},-145.2196053826522,0,1200,2025-08-29_23-03-04,279,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756501384,50.0,334800,30389.643027305603,69.9591133594513,279
+336000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 85807.115, 'num_steps_sampled': 336000, 'update_time_ms': 2.518, 'num_steps_trained': 336000, 'load_time_ms': 0.604, 'default': {'kl': 0.015613911673426628, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.319066047668457, 'total_loss': 13.783968925476074, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12649664282798767, 'vf_explained_var': 0.9884146451950073, 'vf_loss': 13.89465618133545}, 'grad_time_ms': 742.985}",3934253,30490.19049167633,-153.73297503235312,cda-server-6,24,-208.3227003464183,{},6720,10.157.146.6,{},-145.24336047937695,0,1200,2025-08-29_23-04-44,280,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756501484,50.0,336000,30490.19049167633,100.54746437072754,280
+337200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 89366.171, 'num_steps_sampled': 337200, 'update_time_ms': 2.458, 'num_steps_trained': 337200, 'load_time_ms': 0.598, 'default': {'kl': 0.01393085066229105, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.271801948547363, 'total_loss': 63.68611526489258, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12504935264587402, 'vf_explained_var': 0.9535910487174988, 'vf_loss': 63.79706573486328}, 'grad_time_ms': 735.058}",3934253,30591.69241476059,-154.0611767016651,cda-server-6,24,-208.3227003464183,{},6744,10.157.146.6,{},-145.24336047937695,0,1200,2025-08-29_23-06-26,281,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756501586,50.0,337200,30591.69241476059,101.50192308425903,281
+338400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 89531.295, 'num_steps_sampled': 338400, 'update_time_ms': 2.404, 'num_steps_trained': 338400, 'load_time_ms': 0.6, 'default': {'kl': 0.01404589880257845, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.205625534057617, 'total_loss': 21.548248291015625, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12210464477539062, 'vf_explained_var': 0.9836018681526184, 'vf_loss': 21.656131744384766}, 'grad_time_ms': 748.553}",3934253,30682.852532863617,-154.06282255443577,cda-server-6,24,-208.3227003464183,{},6768,10.157.146.6,{},-146.79730571525536,0,1200,2025-08-29_23-07-57,282,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756501677,50.0,338400,30682.852532863617,91.16011810302734,282
+339600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93866.94, 'num_steps_sampled': 339600, 'update_time_ms': 2.363, 'num_steps_trained': 339600, 'load_time_ms': 0.602, 'default': {'kl': 0.013868219219148159, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.38871955871582, 'total_loss': 22.872215270996094, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12734149396419525, 'vf_explained_var': 0.9838337302207947, 'vf_loss': 22.985517501831055}, 'grad_time_ms': 741.84}",3934253,30790.766562223434,-153.490601175543,cda-server-6,24,-186.98396846066603,{},6792,10.157.146.6,{},-146.79730571525536,0,1200,2025-08-29_23-09-45,283,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756501785,50.0,339600,30790.766562223434,107.9140293598175,283
+340800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92716.905, 'num_steps_sampled': 340800, 'update_time_ms': 2.316, 'num_steps_trained': 340800, 'load_time_ms': 0.606, 'default': {'kl': 0.01389007456600666, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.565324783325195, 'total_loss': 53.0439453125, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12358132749795914, 'vf_explained_var': 0.9608864188194275, 'vf_loss': 53.15346145629883}, 'grad_time_ms': 743.247}",3934253,30875.64744758606,-153.8480949080955,cda-server-6,24,-186.98396846066603,{},6816,10.157.146.6,{},-146.79730571525536,0,1200,2025-08-29_23-11-10,284,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756501870,50.0,340800,30875.64744758606,84.88088536262512,284
+342000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 90635.975, 'num_steps_sampled': 342000, 'update_time_ms': 2.262, 'num_steps_trained': 342000, 'load_time_ms': 0.611, 'default': {'kl': 0.014106114394962788, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.181747436523438, 'total_loss': 36.09983825683594, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11997734010219574, 'vf_explained_var': 0.9704306125640869, 'vf_loss': 36.20553207397461}, 'grad_time_ms': 743.898}",3934253,30960.3914706707,-153.69546458851175,cda-server-6,24,-184.65606171714566,{},6840,10.157.146.6,{},-136.9307972088323,0,1200,2025-08-29_23-12-35,285,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756501955,50.0,342000,30960.3914706707,84.7440230846405,285
+343200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 90821.993, 'num_steps_sampled': 343200, 'update_time_ms': 2.237, 'num_steps_trained': 343200, 'load_time_ms': 0.614, 'default': {'kl': 0.014162329956889153, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.337715148925781, 'total_loss': 29.2408447265625, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13232357800006866, 'vf_explained_var': 0.9764517545700073, 'vf_loss': 29.35883331298828}, 'grad_time_ms': 762.872}",3934253,31064.113805532455,-153.9291175871248,cda-server-6,24,-184.65606171714566,{},6864,10.157.146.6,{},-136.9307972088323,0,1200,2025-08-29_23-14-19,286,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756502059,50.0,343200,31064.113805532455,103.72233486175537,286
+344400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 91537.605, 'num_steps_sampled': 344400, 'update_time_ms': 2.268, 'num_steps_trained': 344400, 'load_time_ms': 0.618, 'default': {'kl': 0.015963837504386902, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.328529357910156, 'total_loss': 17.16999053955078, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.131949320435524, 'vf_explained_var': 0.9855950474739075, 'vf_loss': 17.285778045654297}, 'grad_time_ms': 770.754}",3934253,31159.240578889847,-154.0807793508338,cda-server-6,24,-184.65606171714566,{},6888,10.157.146.6,{},-136.9307972088323,0,1200,2025-08-29_23-15-54,287,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756502154,50.0,344400,31159.240578889847,95.12677335739136,287
+345600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93518.384, 'num_steps_sampled': 345600, 'update_time_ms': 2.275, 'num_steps_trained': 345600, 'load_time_ms': 0.618, 'default': {'kl': 0.014267970807850361, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.001362800598145, 'total_loss': 29.061933517456055, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1258041262626648, 'vf_explained_var': 0.9764705300331116, 'vf_loss': 29.17329216003418}, 'grad_time_ms': 761.355}",3934253,31262.563413619995,-153.38289002657675,cda-server-6,24,-169.46693858971975,{},6912,10.157.146.6,{},-136.9307972088323,0,1200,2025-08-29_23-17-37,288,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756502257,50.0,345600,31262.563413619995,103.32283473014832,288
+346800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94912.713, 'num_steps_sampled': 346800, 'update_time_ms': 2.331, 'num_steps_trained': 346800, 'load_time_ms': 0.635, 'default': {'kl': 0.013144236989319324, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.226943016052246, 'total_loss': 24.29330062866211, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1239776462316513, 'vf_explained_var': 0.9844390153884888, 'vf_loss': 24.40397071838379}, 'grad_time_ms': 762.003}",3934253,31346.472144842148,-153.57627731987313,cda-server-6,24,-170.88801007674104,{},6936,10.157.146.6,{},-144.3950308917359,0,1200,2025-08-29_23-19-01,289,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756502341,50.0,346800,31346.472144842148,83.90873122215271,289
+348000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95128.303, 'num_steps_sampled': 348000, 'update_time_ms': 2.287, 'num_steps_trained': 348000, 'load_time_ms': 0.634, 'default': {'kl': 0.015516340732574463, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.11108112335205, 'total_loss': 22.668201446533203, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13011670112609863, 'vf_explained_var': 0.9813645482063293, 'vf_loss': 22.782609939575195}, 'grad_time_ms': 768.12}",3934253,31449.235904693604,-153.4808211215403,cda-server-6,24,-170.88801007674104,{},6960,10.157.146.6,{},-144.3950308917359,0,1200,2025-08-29_23-20-44,290,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756502444,50.0,348000,31449.235904693604,102.76375985145569,290
+349200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95007.017, 'num_steps_sampled': 349200, 'update_time_ms': 2.297, 'num_steps_trained': 349200, 'load_time_ms': 0.638, 'default': {'kl': 0.013895703479647636, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.218277931213379, 'total_loss': 53.47324752807617, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12617962062358856, 'vf_explained_var': 0.9618358612060547, 'vf_loss': 53.585357666015625}, 'grad_time_ms': 768.239}",3934253,31549.526314735413,-153.55709118338893,cda-server-6,24,-185.80293929008243,{},6984,10.157.146.6,{},-144.3950308917359,0,1200,2025-08-29_23-22-24,291,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756502544,50.0,349200,31549.526314735413,100.29041004180908,291
+350400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95377.448, 'num_steps_sampled': 350400, 'update_time_ms': 2.285, 'num_steps_trained': 350400, 'load_time_ms': 0.635, 'default': {'kl': 0.013131446205079556, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.09090805053711, 'total_loss': 13.908455848693848, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13256239891052246, 'vf_explained_var': 0.9884033203125, 'vf_loss': 14.027721405029297}, 'grad_time_ms': 772.362}",3934253,31644.43196439743,-153.58899087363505,cda-server-6,24,-185.80293929008243,{},7008,10.157.146.6,{},-144.3950308917359,0,1200,2025-08-29_23-23-59,292,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756502639,50.0,350400,31644.43196439743,94.90564966201782,292
+351600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95129.044, 'num_steps_sampled': 351600, 'update_time_ms': 2.323, 'num_steps_trained': 351600, 'load_time_ms': 0.626, 'default': {'kl': 0.01519844401627779, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.170174598693848, 'total_loss': 22.778303146362305, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1352260261774063, 'vf_explained_var': 0.9831691384315491, 'vf_loss': 22.89813995361328}, 'grad_time_ms': 772.593}",3934253,31749.863520383835,-153.29731566182426,cda-server-6,24,-185.80293929008243,{},7032,10.157.146.6,{},-145.8788879310617,0,1200,2025-08-29_23-25-44,293,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756502744,50.0,351600,31749.863520383835,105.43155598640442,293
+352800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 96644.061, 'num_steps_sampled': 352800, 'update_time_ms': 2.419, 'num_steps_trained': 352800, 'load_time_ms': 0.622, 'default': {'kl': 0.01330583542585373, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.097810745239258, 'total_loss': 36.368648529052734, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12001624703407288, 'vf_explained_var': 0.9719719290733337, 'vf_loss': 36.47519302368164}, 'grad_time_ms': 769.725}",3934253,31849.86645746231,-153.5049688801624,cda-server-6,24,-185.80293929008243,{},7056,10.157.146.6,{},-139.82080949651424,0,1200,2025-08-29_23-27-24,294,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756502844,50.0,352800,31849.86645746231,100.00293707847595,294
+354000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 96999.372, 'num_steps_sampled': 354000, 'update_time_ms': 2.436, 'num_steps_trained': 354000, 'load_time_ms': 0.627, 'default': {'kl': 0.015100941061973572, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.145347595214844, 'total_loss': 31.30360221862793, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12340303510427475, 'vf_explained_var': 0.9782091975212097, 'vf_loss': 31.41171646118164}, 'grad_time_ms': 767.442}",3934253,31938.141626119614,-153.3523614012057,cda-server-6,24,-181.32258316814773,{},7080,10.157.146.6,{},-139.15408264827664,0,1200,2025-08-29_23-28-53,295,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756502933,50.0,354000,31938.141626119614,88.27516865730286,295
+355200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94766.538, 'num_steps_sampled': 355200, 'update_time_ms': 2.442, 'num_steps_trained': 355200, 'load_time_ms': 0.634, 'default': {'kl': 0.015226011164486408, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.920625686645508, 'total_loss': 17.984262466430664, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12347279489040375, 'vf_explained_var': 0.9853192567825317, 'vf_loss': 18.092321395874023}, 'grad_time_ms': 759.294}",3934253,32019.453699350357,-153.4454585060366,cda-server-6,24,-181.32258316814773,{},7104,10.157.146.6,{},-139.15408264827664,0,1200,2025-08-29_23-30-14,296,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756503014,50.0,355200,32019.453699350357,81.31207323074341,296
+356400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92443.191, 'num_steps_sampled': 356400, 'update_time_ms': 2.467, 'num_steps_trained': 356400, 'load_time_ms': 0.664, 'default': {'kl': 0.014233733527362347, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.99919605255127, 'total_loss': 12.16675853729248, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1281324028968811, 'vf_explained_var': 0.99040287733078, 'vf_loss': 12.280479431152344}, 'grad_time_ms': 760.707}",3934253,32091.361676692963,-153.30487583861384,cda-server-6,24,-181.32258316814773,{},7128,10.157.146.6,{},-139.15408264827664,0,1200,2025-08-29_23-31-26,297,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756503086,50.0,356400,32091.361676692963,71.90797734260559,297
+357600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92704.121, 'num_steps_sampled': 357600, 'update_time_ms': 2.481, 'num_steps_trained': 357600, 'load_time_ms': 0.66, 'default': {'kl': 0.013451273553073406, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.026782989501953, 'total_loss': 21.95667266845703, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1233801320195198, 'vf_explained_var': 0.9825711846351624, 'vf_loss': 22.06643295288086}, 'grad_time_ms': 762.934}",3934253,32197.31569838524,-152.81307272750516,cda-server-6,24,-163.96797787962552,{},7152,10.157.146.6,{},-139.15408264827664,0,1200,2025-08-29_23-33-12,298,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756503192,50.0,357600,32197.31569838524,105.954021692276,298
+358800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94347.891, 'num_steps_sampled': 358800, 'update_time_ms': 2.435, 'num_steps_trained': 358800, 'load_time_ms': 0.649, 'default': {'kl': 0.015564335510134697, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.200122833251953, 'total_loss': 27.603986740112305, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14806872606277466, 'vf_explained_var': 0.9808406829833984, 'vf_loss': 27.73629379272461}, 'grad_time_ms': 767.521}",3934253,32297.707879304886,-152.7437017894222,cda-server-6,24,-164.73387901983173,{},7176,10.157.146.6,{},-139.15408264827664,0,1200,2025-08-29_23-34-52,299,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756503292,50.0,358800,32297.707879304886,100.39218091964722,299
+360000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93078.785, 'num_steps_sampled': 360000, 'update_time_ms': 2.486, 'num_steps_trained': 360000, 'load_time_ms': 0.657, 'default': {'kl': 0.014852085150778294, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.155905723571777, 'total_loss': 15.688905715942383, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14254923164844513, 'vf_explained_var': 0.9873740673065186, 'vf_loss': 15.81641674041748}, 'grad_time_ms': 766.199}",3934253,32387.767731428146,-153.14312093140904,cda-server-6,24,-169.91469154306978,{},7200,10.157.146.6,{},-140.8243464522184,0,1200,2025-08-29_23-36-22,300,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756503382,50.0,360000,32387.767731428146,90.0598521232605,300
+361200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92802.854, 'num_steps_sampled': 361200, 'update_time_ms': 2.522, 'num_steps_trained': 361200, 'load_time_ms': 0.655, 'default': {'kl': 0.016245905309915543, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.024404525756836, 'total_loss': 11.871007919311523, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13858658075332642, 'vf_explained_var': 0.99014812707901, 'vf_loss': 11.993144989013672}, 'grad_time_ms': 762.687}",3934253,32485.263649463654,-153.2274074502331,cda-server-6,24,-169.91469154306978,{},7224,10.157.146.6,{},-140.8243464522184,0,1200,2025-08-29_23-38-00,301,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756503480,50.0,361200,32485.263649463654,97.4959180355072,301
+362400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92911.094, 'num_steps_sampled': 362400, 'update_time_ms': 2.569, 'num_steps_trained': 362400, 'load_time_ms': 0.655, 'default': {'kl': 0.014216229319572449, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.159814834594727, 'total_loss': 35.544677734375, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12864679098129272, 'vf_explained_var': 0.9736400246620178, 'vf_loss': 35.658931732177734}, 'grad_time_ms': 762.43}",3934253,32581.249537229538,-153.71020029202208,cda-server-6,24,-169.91469154306978,{},7248,10.157.146.6,{},-149.21272310850614,0,1200,2025-08-29_23-39-36,302,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756503576,50.0,362400,32581.249537229538,95.9858877658844,302
+363600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 88828.401, 'num_steps_sampled': 363600, 'update_time_ms': 2.568, 'num_steps_trained': 363600, 'load_time_ms': 0.653, 'default': {'kl': 0.015200129710137844, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.10995101928711, 'total_loss': 23.112335205078125, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1372426450252533, 'vf_explained_var': 0.9830207824707031, 'vf_loss': 23.23418617248535}, 'grad_time_ms': 763.732}",3934253,32645.868771076202,-153.62237696956078,cda-server-6,24,-169.91469154306978,{},7272,10.157.146.6,{},-148.23228434829258,0,1200,2025-08-29_23-40-41,303,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756503641,50.0,363600,32645.868771076202,64.61923384666443,303
+364800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 89234.035, 'num_steps_sampled': 364800, 'update_time_ms': 2.594, 'num_steps_trained': 364800, 'load_time_ms': 0.651, 'default': {'kl': 0.014623595401644707, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.935812950134277, 'total_loss': 18.714929580688477, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12618975341320038, 'vf_explained_var': 0.985697329044342, 'vf_loss': 18.826313018798828}, 'grad_time_ms': 762.85}",3934253,32749.919049024582,-153.52869796702987,cda-server-6,24,-166.35021138292797,{},7296,10.157.146.6,{},-148.23228434829258,0,1200,2025-08-29_23-42-25,304,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756503745,50.0,364800,32749.919049024582,104.05027794837952,304
+366000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 87893.805, 'num_steps_sampled': 366000, 'update_time_ms': 2.593, 'num_steps_trained': 366000, 'load_time_ms': 0.643, 'default': {'kl': 0.015481146052479744, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.092779159545898, 'total_loss': 23.730798721313477, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14704284071922302, 'vf_explained_var': 0.9847856163978577, 'vf_loss': 23.86216926574707}, 'grad_time_ms': 753.279}",3934253,32824.69520068169,-154.023138854144,cda-server-6,24,-167.08198004963523,{},7320,10.157.146.6,{},-147.8016334886118,0,1200,2025-08-29_23-43-39,305,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756503819,50.0,366000,32824.69520068169,74.77615165710449,305
+367200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 89572.983, 'num_steps_sampled': 367200, 'update_time_ms': 2.602, 'num_steps_trained': 367200, 'load_time_ms': 0.638, 'default': {'kl': 0.013067873194813728, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.859930992126465, 'total_loss': 31.82198143005371, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14084021747112274, 'vf_explained_var': 0.9786883592605591, 'vf_loss': 31.949594497680664}, 'grad_time_ms': 726.482}",3934253,32922.53137564659,-153.78323260138052,cda-server-6,24,-167.08198004963523,{},7344,10.157.146.6,{},-147.8016334886118,0,1200,2025-08-29_23-45-17,306,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756503917,50.0,367200,32922.53137564659,97.83617496490479,306
+368400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93512.777, 'num_steps_sampled': 368400, 'update_time_ms': 2.596, 'num_steps_trained': 368400, 'load_time_ms': 0.604, 'default': {'kl': 0.014852987602353096, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.933476448059082, 'total_loss': 21.214004516601562, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13704806566238403, 'vf_explained_var': 0.9833498001098633, 'vf_loss': 21.336013793945312}, 'grad_time_ms': 711.308}",3934253,33033.6856508255,-153.88048444856662,cda-server-6,24,-170.91292767388077,{},7368,10.157.146.6,{},-147.8016334886118,0,1200,2025-08-29_23-47-08,307,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756504028,50.0,368400,33033.6856508255,111.1542751789093,307
+369600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 91802.15, 'num_steps_sampled': 369600, 'update_time_ms': 2.62, 'num_steps_trained': 369600, 'load_time_ms': 0.612, 'default': {'kl': 0.01284022256731987, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.249340057373047, 'total_loss': 48.84939193725586, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14142972230911255, 'vf_explained_var': 0.9649655818939209, 'vf_loss': 48.977821350097656}, 'grad_time_ms': 709.303}",3934253,33122.514219760895,-154.2712712317214,cda-server-6,24,-186.36841074023712,{},7392,10.157.146.6,{},-144.26847544598456,0,1200,2025-08-29_23-48-37,308,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756504117,50.0,369600,33122.514219760895,88.82856893539429,308
+370800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 90123.807, 'num_steps_sampled': 370800, 'update_time_ms': 2.665, 'num_steps_trained': 370800, 'load_time_ms': 0.604, 'default': {'kl': 0.013471885584294796, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.025983810424805, 'total_loss': 25.35476303100586, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13464468717575073, 'vf_explained_var': 0.9827299118041992, 'vf_loss': 25.475767135620117}, 'grad_time_ms': 703.0}",3934253,33206.06060504913,-153.94511450306916,cda-server-6,24,-186.36841074023712,{},7416,10.157.146.6,{},-142.45030726659775,0,1200,2025-08-29_23-50-01,309,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756504201,50.0,370800,33206.06060504913,83.54638528823853,309
+372000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 90496.011, 'num_steps_sampled': 372000, 'update_time_ms': 2.641, 'num_steps_trained': 372000, 'load_time_ms': 0.597, 'default': {'kl': 0.01515925396233797, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.127731323242188, 'total_loss': 23.858789443969727, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12599676847457886, 'vf_explained_var': 0.9824094772338867, 'vf_loss': 23.969438552856445}, 'grad_time_ms': 712.061}",3934253,33299.933065891266,-154.24905917335306,cda-server-6,24,-186.36841074023712,{},7440,10.157.146.6,{},-141.46524261832909,0,1200,2025-08-29_23-51-35,310,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756504295,50.0,372000,33299.933065891266,93.87246084213257,310
+373200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92302.612, 'num_steps_sampled': 373200, 'update_time_ms': 2.63, 'num_steps_trained': 373200, 'load_time_ms': 0.608, 'default': {'kl': 0.015349972993135452, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.800884246826172, 'total_loss': 13.16865348815918, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1375599354505539, 'vf_explained_var': 0.9889466762542725, 'vf_loss': 13.290670394897461}, 'grad_time_ms': 720.193}",3934253,33415.57654643059,-153.81849049903275,cda-server-6,24,-186.36841074023712,{},7464,10.157.146.6,{},-141.46524261832909,0,1200,2025-08-29_23-53-30,311,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756504410,50.0,373200,33415.57654643059,115.6434805393219,311
+374400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92804.591, 'num_steps_sampled': 374400, 'update_time_ms': 2.579, 'num_steps_trained': 374400, 'load_time_ms': 0.603, 'default': {'kl': 0.014131312258541584, 'cur_lr': 4.999999873689376e-05, 'entropy': 11.05422592163086, 'total_loss': 23.799354553222656, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13450416922569275, 'vf_explained_var': 0.9824861884117126, 'vf_loss': 23.91954803466797}, 'grad_time_ms': 710.768}",3934253,33516.487151145935,-153.691471397228,cda-server-6,24,-174.5455242556761,{},7488,10.157.146.6,{},-141.46524261832909,0,1200,2025-08-29_23-55-11,312,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756504511,50.0,374400,33516.487151145935,100.91060471534729,312
+375600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 96374.432, 'num_steps_sampled': 375600, 'update_time_ms': 2.526, 'num_steps_trained': 375600, 'load_time_ms': 0.606, 'default': {'kl': 0.014769317582249641, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.969765663146973, 'total_loss': 24.39408302307129, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1261298954486847, 'vf_explained_var': 0.980952799320221, 'vf_loss': 24.505258560180664}, 'grad_time_ms': 700.257}",3934253,33616.69808459282,-153.32532619977394,cda-server-6,24,-174.5455242556761,{},7512,10.157.146.6,{},-138.3540792562646,0,1200,2025-08-29_23-56-52,313,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756504612,50.0,375600,33616.69808459282,100.21093344688416,313
+376800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 96242.033, 'num_steps_sampled': 376800, 'update_time_ms': 2.497, 'num_steps_trained': 376800, 'load_time_ms': 0.606, 'default': {'kl': 0.012455091811716557, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.836710929870605, 'total_loss': 39.87970733642578, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10243361443281174, 'vf_explained_var': 0.9777176976203918, 'vf_loss': 39.96952819824219}, 'grad_time_ms': 692.382}",3934253,33719.34510588646,-153.10512817751962,cda-server-6,24,-174.5455242556761,{},7536,10.157.146.6,{},-138.3540792562646,0,1200,2025-08-29_23-58-34,314,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756504714,50.0,376800,33719.34510588646,102.64702129364014,314
+378000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 96745.536, 'num_steps_sampled': 378000, 'update_time_ms': 2.53, 'num_steps_trained': 378000, 'load_time_ms': 0.606, 'default': {'kl': 0.012768601067364216, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.945272445678711, 'total_loss': 48.44010925292969, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1169797033071518, 'vf_explained_var': 0.9687525629997253, 'vf_loss': 48.544151306152344}, 'grad_time_ms': 704.269}",3934253,33799.27585601807,-153.3753794364622,cda-server-6,24,-182.4550995827381,{},7560,10.157.146.6,{},-138.3540792562646,0,1200,2025-08-29_23-59-54,315,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756504794,50.0,378000,33799.27585601807,79.93075013160706,315
+379200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 97378.354, 'num_steps_sampled': 379200, 'update_time_ms': 2.497, 'num_steps_trained': 379200, 'load_time_ms': 0.603, 'default': {'kl': 0.014992697164416313, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.970458984375, 'total_loss': 37.55704116821289, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13573689758777618, 'vf_explained_var': 0.9713044762611389, 'vf_loss': 37.67759704589844}, 'grad_time_ms': 729.514}",3934253,33903.69194102287,-153.12148182322898,cda-server-6,24,-182.4550995827381,{},7584,10.157.146.6,{},-138.3540792562646,0,1200,2025-08-30_00-01-39,316,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756504899,50.0,379200,33903.69194102287,104.41608500480652,316
+380400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95875.166, 'num_steps_sampled': 380400, 'update_time_ms': 2.454, 'num_steps_trained': 380400, 'load_time_ms': 0.605, 'default': {'kl': 0.014862080104649067, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.844161987304688, 'total_loss': 21.56414222717285, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12421739101409912, 'vf_explained_var': 0.9830238819122314, 'vf_loss': 21.67331314086914}, 'grad_time_ms': 751.727}",3934253,34000.03533434868,-153.2457239279507,cda-server-6,24,-182.4550995827381,{},7608,10.157.146.6,{},-142.5929949692987,0,1200,2025-08-30_00-03-15,317,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756504995,50.0,380400,34000.03533434868,96.34339332580566,317
+381600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 97708.083, 'num_steps_sampled': 381600, 'update_time_ms': 2.406, 'num_steps_trained': 381600, 'load_time_ms': 0.629, 'default': {'kl': 0.014282830990850925, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.415968894958496, 'total_loss': 22.82317352294922, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1225418746471405, 'vf_explained_var': 0.9830620884895325, 'vf_loss': 22.931251525878906}, 'grad_time_ms': 754.055}",3934253,34107.21705150604,-152.96947395657688,cda-server-6,24,-182.4550995827381,{},7632,10.157.146.6,{},-143.11070441906222,0,1200,2025-08-30_00-05-02,318,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756505102,50.0,381600,34107.21705150604,107.18171715736389,318
+382800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 100184.811, 'num_steps_sampled': 382800, 'update_time_ms': 2.423, 'num_steps_trained': 382800, 'load_time_ms': 0.63, 'default': {'kl': 0.01591685228049755, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.475652694702148, 'total_loss': 11.753562927246094, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13239659368991852, 'vf_explained_var': 0.9901783466339111, 'vf_loss': 11.869844436645508}, 'grad_time_ms': 739.596}",3934253,34215.38590621948,-152.57093134687875,cda-server-6,24,-175.74868372203048,{},7656,10.157.146.6,{},-143.11070441906222,0,1200,2025-08-30_00-06-50,319,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756505210,50.0,382800,34215.38590621948,108.16885471343994,319
+384000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 101709.565, 'num_steps_sampled': 384000, 'update_time_ms': 2.429, 'num_steps_trained': 384000, 'load_time_ms': 0.636, 'default': {'kl': 0.014203101396560669, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.735393524169922, 'total_loss': 23.69377326965332, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12848956882953644, 'vf_explained_var': 0.9825847148895264, 'vf_loss': 23.80788230895996}, 'grad_time_ms': 735.134}",3934253,34324.46160006523,-152.58352243026727,cda-server-6,24,-175.74868372203048,{},7680,10.157.146.6,{},-143.11070441906222,0,1200,2025-08-30_00-08-39,320,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756505319,50.0,384000,34324.46160006523,109.0756938457489,320
+385200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 99365.659, 'num_steps_sampled': 385200, 'update_time_ms': 2.482, 'num_steps_trained': 385200, 'load_time_ms': 0.618, 'default': {'kl': 0.014924119226634502, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.55162525177002, 'total_loss': 19.39442253112793, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12613314390182495, 'vf_explained_var': 0.9840491414070129, 'vf_loss': 19.50544548034668}, 'grad_time_ms': 736.471}",3934253,34416.68057346344,-152.4897771954675,cda-server-6,24,-170.05123202179706,{},7704,10.157.146.6,{},-148.94070225783665,0,1200,2025-08-30_00-10-12,321,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756505412,50.0,385200,34416.68057346344,92.21897339820862,321
+386400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 101335.245, 'num_steps_sampled': 386400, 'update_time_ms': 2.512, 'num_steps_trained': 386400, 'load_time_ms': 0.619, 'default': {'kl': 0.012489722110331059, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.611146926879883, 'total_loss': 42.83867645263672, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12331356108188629, 'vf_explained_var': 0.9723660349845886, 'vf_loss': 42.94934844970703}, 'grad_time_ms': 738.868}",3934253,34537.31090283394,-153.15623692414303,cda-server-6,24,-180.00500045552593,{},7728,10.157.146.6,{},-148.94070225783665,0,1200,2025-08-30_00-12-12,322,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756505532,50.0,386400,34537.31090283394,120.63032937049866,322
+387600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 102548.43, 'num_steps_sampled': 387600, 'update_time_ms': 2.596, 'num_steps_trained': 387600, 'load_time_ms': 0.616, 'default': {'kl': 0.013788405805826187, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.14149284362793, 'total_loss': 22.343345642089844, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11049012094736099, 'vf_explained_var': 0.9818713068962097, 'vf_loss': 22.43987464904785}, 'grad_time_ms': 751.021}",3934253,34649.776156425476,-153.4625475023141,cda-server-6,24,-180.00500045552593,{},7752,10.157.146.6,{},-148.94070225783665,0,1200,2025-08-30_00-14-05,323,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756505645,50.0,387600,34649.776156425476,112.46525359153748,323
+388800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 100600.696, 'num_steps_sampled': 388800, 'update_time_ms': 2.617, 'num_steps_trained': 388800, 'load_time_ms': 0.621, 'default': {'kl': 0.015624160878360271, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.596100807189941, 'total_loss': 22.377880096435547, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1312115639448166, 'vf_explained_var': 0.9831936955451965, 'vf_loss': 22.49327278137207}, 'grad_time_ms': 759.895}",3934253,34733.0354244709,-153.77975317555422,cda-server-6,24,-180.00500045552593,{},7776,10.157.146.6,{},-148.94070225783665,0,1200,2025-08-30_00-15-28,324,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756505728,50.0,388800,34733.0354244709,83.25926804542542,324
+390000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 103876.041, 'num_steps_sampled': 390000, 'update_time_ms': 2.579, 'num_steps_trained': 390000, 'load_time_ms': 0.625, 'default': {'kl': 0.01323324628174305, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.411630630493164, 'total_loss': 44.34865188598633, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1147596687078476, 'vf_explained_var': 0.9732678532600403, 'vf_loss': 44.450016021728516}, 'grad_time_ms': 725.227}",3934253,34845.3717956543,-154.14158061826183,cda-server-6,24,-180.00500045552593,{},7800,10.157.146.6,{},-150.57069385002504,0,1200,2025-08-30_00-17-20,325,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756505840,50.0,390000,34845.3717956543,112.33637118339539,325
+391200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 101697.002, 'num_steps_sampled': 391200, 'update_time_ms': 2.57, 'num_steps_trained': 391200, 'load_time_ms': 0.628, 'default': {'kl': 0.012857540510594845, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.455910682678223, 'total_loss': 42.997108459472656, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10881756246089935, 'vf_explained_var': 0.9767987132072449, 'vf_loss': 43.09290313720703}, 'grad_time_ms': 731.369}",3934253,34928.06006979942,-154.09453792189086,cda-server-6,24,-173.1302892079539,{},7824,10.157.146.6,{},-150.75378690688086,0,1200,2025-08-30_00-18-43,326,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756505923,50.0,391200,34928.06006979942,82.68827414512634,326
+392400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 99412.19, 'num_steps_sampled': 392400, 'update_time_ms': 2.608, 'num_steps_trained': 392400, 'load_time_ms': 0.632, 'default': {'kl': 0.013225565664470196, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.42746353149414, 'total_loss': 30.64324951171875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13256524503231049, 'vf_explained_var': 0.9776370525360107, 'vf_loss': 30.76242446899414}, 'grad_time_ms': 726.856}",3934253,35001.51141524315,-154.43401371835216,cda-server-6,24,-180.4741776622837,{},7848,10.157.146.6,{},-150.64127333487605,0,1200,2025-08-30_00-19-57,327,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756505997,50.0,392400,35001.51141524315,73.45134544372559,327
+393600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 98206.757, 'num_steps_sampled': 393600, 'update_time_ms': 2.612, 'num_steps_trained': 393600, 'load_time_ms': 0.598, 'default': {'kl': 0.013027322478592396, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.334811210632324, 'total_loss': 25.57097053527832, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11095554381608963, 'vf_explained_var': 0.9810521006584167, 'vf_loss': 25.668737411499023}, 'grad_time_ms': 726.985}",3934253,35096.638957738876,-154.35407762027717,cda-server-6,24,-180.4741776622837,{},7872,10.157.146.6,{},-150.64127333487605,0,1200,2025-08-30_00-21-32,328,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756506092,50.0,393600,35096.638957738876,95.12754249572754,328
+394800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 99099.85, 'num_steps_sampled': 394800, 'update_time_ms': 2.616, 'num_steps_trained': 394800, 'load_time_ms': 0.609, 'default': {'kl': 0.015124778263270855, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.097905158996582, 'total_loss': 23.35348129272461, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1195986419916153, 'vf_explained_var': 0.9812294840812683, 'vf_loss': 23.457765579223633}, 'grad_time_ms': 747.878}",3934253,35213.948383808136,-153.77713772000587,cda-server-6,24,-180.4741776622837,{},7896,10.157.146.6,{},-136.8694429954124,0,1200,2025-08-30_00-23-29,329,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756506209,50.0,394800,35213.948383808136,117.30942606925964,329
+396000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 96400.676, 'num_steps_sampled': 396000, 'update_time_ms': 2.616, 'num_steps_trained': 396000, 'load_time_ms': 0.608, 'default': {'kl': 0.014633645303547382, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.538222312927246, 'total_loss': 20.841421127319336, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12716291844844818, 'vf_explained_var': 0.9844285249710083, 'vf_loss': 20.953765869140625}, 'grad_time_ms': 746.647}",3934253,35296.019594192505,-153.28856495343746,cda-server-6,24,-180.4741776622837,{},7920,10.157.146.6,{},-136.8694429954124,0,1200,2025-08-30_00-24-51,330,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756506291,50.0,396000,35296.019594192505,82.0712103843689,330
+397200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 97662.735, 'num_steps_sampled': 397200, 'update_time_ms': 2.72, 'num_steps_trained': 397200, 'load_time_ms': 0.609, 'default': {'kl': 0.014507361687719822, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.390003204345703, 'total_loss': 28.46442413330078, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1269027292728424, 'vf_explained_var': 0.9785017371177673, 'vf_loss': 28.57663917541504}, 'grad_time_ms': 737.279}",3934253,35400.76520228386,-152.84106423066166,cda-server-6,24,-180.4741776622837,{},7944,10.157.146.6,{},-135.7076686254385,0,1200,2025-08-30_00-26-36,331,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756506396,50.0,397200,35400.76520228386,104.74560809135437,331
+398400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94954.544, 'num_steps_sampled': 398400, 'update_time_ms': 2.706, 'num_steps_trained': 398400, 'load_time_ms': 0.621, 'default': {'kl': 0.014371686615049839, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.481554985046387, 'total_loss': 26.985797882080078, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13098128139972687, 'vf_explained_var': 0.9793742299079895, 'vf_loss': 27.10222816467285}, 'grad_time_ms': 742.071}",3934253,35494.36348748207,-152.3610456543385,cda-server-6,24,-166.77579605740746,{},7968,10.157.146.6,{},-135.7076686254385,0,1200,2025-08-30_00-28-10,332,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756506490,50.0,398400,35494.36348748207,93.59828519821167,332
+399600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 91593.414, 'num_steps_sampled': 399600, 'update_time_ms': 2.679, 'num_steps_trained': 399600, 'load_time_ms': 0.625, 'default': {'kl': 0.013958621770143509, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.51937484741211, 'total_loss': 40.451904296875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13136720657348633, 'vf_explained_var': 0.9713349938392639, 'vf_loss': 40.56913757324219}, 'grad_time_ms': 744.421}",3934253,35573.24069619179,-152.6889494291554,cda-server-6,24,-177.64100823331634,{},7992,10.157.146.6,{},-135.7076686254385,0,1200,2025-08-30_00-29-28,333,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756506568,50.0,399600,35573.24069619179,78.8772087097168,333
+400800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 91368.618, 'num_steps_sampled': 400800, 'update_time_ms': 2.637, 'num_steps_trained': 400800, 'load_time_ms': 0.628, 'default': {'kl': 0.015249352902173996, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.27700424194336, 'total_loss': 21.162511825561523, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11858128011226654, 'vf_explained_var': 0.9836852550506592, 'vf_loss': 21.26565170288086}, 'grad_time_ms': 752.607}",3934253,35654.33391952515,-152.6987609356839,cda-server-6,24,-177.64100823331634,{},8016,10.157.146.6,{},-135.7076686254385,0,1200,2025-08-30_00-30-50,334,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756506650,50.0,400800,35654.33391952515,81.09322333335876,334
+402000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 88729.159, 'num_steps_sampled': 402000, 'update_time_ms': 2.675, 'num_steps_trained': 402000, 'load_time_ms': 0.63, 'default': {'kl': 0.013706881552934647, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.258893013000488, 'total_loss': 18.555627822875977, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12699751555919647, 'vf_explained_var': 0.986332356929779, 'vf_loss': 18.668746948242188}, 'grad_time_ms': 788.154}",3934253,35740.63249707222,-152.9703099260085,cda-server-6,24,-177.64100823331634,{},8040,10.157.146.6,{},-142.11140543958143,0,1200,2025-08-30_00-32-16,335,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756506736,50.0,402000,35740.63249707222,86.29857754707336,335
+403200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 91513.753, 'num_steps_sampled': 403200, 'update_time_ms': 2.708, 'num_steps_trained': 403200, 'load_time_ms': 0.627, 'default': {'kl': 0.013812141492962837, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.123869895935059, 'total_loss': 17.128286361694336, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10887904465198517, 'vf_explained_var': 0.9872063398361206, 'vf_loss': 17.223176956176758}, 'grad_time_ms': 783.817}",3934253,35851.12422943115,-153.04831488940408,cda-server-6,24,-177.64100823331634,{},8064,10.157.146.6,{},-142.11140543958143,0,1200,2025-08-30_00-34-06,336,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756506846,50.0,403200,35851.12422943115,110.4917323589325,336
+404400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92711.829, 'num_steps_sampled': 404400, 'update_time_ms': 2.691, 'num_steps_trained': 404400, 'load_time_ms': 0.634, 'default': {'kl': 0.013465446420013905, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.17501449584961, 'total_loss': 22.101633071899414, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12934455275535583, 'vf_explained_var': 0.9826427102088928, 'vf_loss': 22.217344284057617}, 'grad_time_ms': 781.355}",3934253,35936.53139543533,-152.91974841361036,cda-server-6,24,-167.6798048261915,{},8088,10.157.146.6,{},-144.01814896022987,0,1200,2025-08-30_00-35-32,337,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756506932,50.0,404400,35936.53139543533,85.40716600418091,337
+405600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94088.489, 'num_steps_sampled': 405600, 'update_time_ms': 2.633, 'num_steps_trained': 405600, 'load_time_ms': 0.652, 'default': {'kl': 0.01327629666775465, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.075685501098633, 'total_loss': 20.67936897277832, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.128595769405365, 'vf_explained_var': 0.9839978814125061, 'vf_loss': 20.79452133178711}, 'grad_time_ms': 781.972}",3934253,36045.43131017685,-152.76994038362417,cda-server-6,24,-171.73506361888798,{},8112,10.157.146.6,{},-144.01814896022987,0,1200,2025-08-30_00-37-21,338,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756507041,50.0,405600,36045.43131017685,108.89991474151611,338
+406800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 91540.836, 'num_steps_sampled': 406800, 'update_time_ms': 2.577, 'num_steps_trained': 406800, 'load_time_ms': 0.653, 'default': {'kl': 0.01496865227818489, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.436251640319824, 'total_loss': 32.833805084228516, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12757453322410583, 'vf_explained_var': 0.9752024412155151, 'vf_loss': 32.946224212646484}, 'grad_time_ms': 782.538}",3934253,36137.26930594444,-152.87661447578267,cda-server-6,24,-178.246255970889,{},8136,10.157.146.6,{},-139.1453355829173,0,1200,2025-08-30_00-38-53,339,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756507133,50.0,406800,36137.26930594444,91.83799576759338,339
+408000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 91825.392, 'num_steps_sampled': 408000, 'update_time_ms': 2.581, 'num_steps_trained': 408000, 'load_time_ms': 0.647, 'default': {'kl': 0.014040197245776653, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.187368392944336, 'total_loss': 21.68220329284668, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13107901811599731, 'vf_explained_var': 0.9829478859901428, 'vf_loss': 21.799068450927734}, 'grad_time_ms': 790.481}",3934253,36222.26623415947,-152.5864977452388,cda-server-6,24,-178.246255970889,{},8160,10.157.146.6,{},-139.1453355829173,0,1200,2025-08-30_00-40-18,340,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756507218,50.0,408000,36222.26623415947,84.99692821502686,340
+409200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92400.945, 'num_steps_sampled': 409200, 'update_time_ms': 2.409, 'num_steps_trained': 409200, 'load_time_ms': 0.681, 'default': {'kl': 0.014228183776140213, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.1898193359375, 'total_loss': 19.298744201660156, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11869990825653076, 'vf_explained_var': 0.9837184548377991, 'vf_loss': 19.403038024902344}, 'grad_time_ms': 792.406}",3934253,36332.785865306854,-152.8219143853639,cda-server-6,24,-178.246255970889,{},8184,10.157.146.6,{},-139.1453355829173,0,1200,2025-08-30_00-42-08,341,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756507328,50.0,409200,36332.785865306854,110.51963114738464,341
+410400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 91987.235, 'num_steps_sampled': 410400, 'update_time_ms': 2.385, 'num_steps_trained': 410400, 'load_time_ms': 0.673, 'default': {'kl': 0.013174712657928467, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.154784202575684, 'total_loss': 17.16404914855957, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13054805994033813, 'vf_explained_var': 0.9870219826698303, 'vf_loss': 17.281259536743164}, 'grad_time_ms': 783.421}",3934253,36422.15473651886,-152.79249832994313,cda-server-6,24,-178.246255970889,{},8208,10.157.146.6,{},-139.1453355829173,0,1200,2025-08-30_00-43-38,342,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756507418,50.0,410400,36422.15473651886,89.36887121200562,342
+411600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93259.94, 'num_steps_sampled': 411600, 'update_time_ms': 2.39, 'num_steps_trained': 411600, 'load_time_ms': 0.682, 'default': {'kl': 0.013398093171417713, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.21140193939209, 'total_loss': 15.42952823638916, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12429417669773102, 'vf_explained_var': 0.988605260848999, 'vf_loss': 15.54025650024414}, 'grad_time_ms': 788.74}",3934253,36513.812532663345,-152.4947968460862,cda-server-6,24,-164.25618485757914,{},8232,10.157.146.6,{},-147.63720264870892,0,1200,2025-08-30_00-45-09,343,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756507509,50.0,411600,36513.812532663345,91.65779614448547,343
+412800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94727.256, 'num_steps_sampled': 412800, 'update_time_ms': 2.402, 'num_steps_trained': 412800, 'load_time_ms': 0.673, 'default': {'kl': 0.015052050352096558, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.440613746643066, 'total_loss': 25.006467819213867, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1175057590007782, 'vf_explained_var': 0.9807634353637695, 'vf_loss': 25.108734130859375}, 'grad_time_ms': 784.412}",3934253,36609.53568506241,-153.21520828059778,cda-server-6,24,-168.82503659059702,{},8256,10.157.146.6,{},-147.63720264870892,0,1200,2025-08-30_00-46-45,344,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756507605,50.0,412800,36609.53568506241,95.72315239906311,344
+414000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 96656.326, 'num_steps_sampled': 414000, 'update_time_ms': 2.368, 'num_steps_trained': 414000, 'load_time_ms': 0.668, 'default': {'kl': 0.015476263128221035, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.44300651550293, 'total_loss': 12.492606163024902, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12842413783073425, 'vf_explained_var': 0.989512026309967, 'vf_loss': 12.605360984802246}, 'grad_time_ms': 781.723}",3934253,36715.097074747086,-153.3844868213551,cda-server-6,24,-168.82503659059702,{},8280,10.157.146.6,{},-147.63720264870892,0,1200,2025-08-30_00-48-30,345,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756507710,50.0,414000,36715.097074747086,105.56138968467712,345
+415200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94261.095, 'num_steps_sampled': 415200, 'update_time_ms': 2.333, 'num_steps_trained': 415200, 'load_time_ms': 0.67, 'default': {'kl': 0.013878900557756424, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.181175231933594, 'total_loss': 18.723909378051758, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1245008334517479, 'vf_explained_var': 0.9861525297164917, 'vf_loss': 18.83435821533203}, 'grad_time_ms': 768.491}",3934253,36801.50307202339,-153.39538590524927,cda-server-6,24,-168.82503659059702,{},8304,10.157.146.6,{},-147.63720264870892,0,1200,2025-08-30_00-49-57,346,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756507797,50.0,415200,36801.50307202339,86.40599727630615,346
+416400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95108.192, 'num_steps_sampled': 416400, 'update_time_ms': 2.361, 'num_steps_trained': 416400, 'load_time_ms': 0.662, 'default': {'kl': 0.014218274503946304, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.210870742797852, 'total_loss': 18.950908660888672, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12428196519613266, 'vf_explained_var': 0.9845414757728577, 'vf_loss': 19.060794830322266}, 'grad_time_ms': 764.217}",3934253,36895.33891892433,-153.16668440198112,cda-server-6,24,-168.82503659059702,{},8328,10.157.146.6,{},-148.03892181301913,0,1200,2025-08-30_00-51-31,347,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756507891,50.0,416400,36895.33891892433,93.83584690093994,347
+417600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 91907.856, 'num_steps_sampled': 417600, 'update_time_ms': 2.428, 'num_steps_trained': 417600, 'load_time_ms': 0.654, 'default': {'kl': 0.014065904542803764, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.311721801757812, 'total_loss': 22.437252044677734, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12689092755317688, 'vf_explained_var': 0.983727216720581, 'vf_loss': 22.549901962280273}, 'grad_time_ms': 771.482}",3934253,36972.30895447731,-152.7423944307145,cda-server-6,24,-165.470864728126,{},8352,10.157.146.6,{},-149.157812667166,0,1200,2025-08-30_00-52-48,348,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756507968,50.0,417600,36972.30895447731,76.97003555297852,348
+418800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92743.246, 'num_steps_sampled': 418800, 'update_time_ms': 2.443, 'num_steps_trained': 418800, 'load_time_ms': 0.646, 'default': {'kl': 0.014622226357460022, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.095756530761719, 'total_loss': 13.963083267211914, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13011474907398224, 'vf_explained_var': 0.9886112213134766, 'vf_loss': 14.078393936157227}, 'grad_time_ms': 761.339}",3934253,37072.39999341965,-152.570437889023,cda-server-6,24,-168.57609319041728,{},8376,10.157.146.6,{},-150.24807205629406,0,1200,2025-08-30_00-54-28,349,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756508068,50.0,418800,37072.39999341965,100.09103894233704,349
+420000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93945.555, 'num_steps_sampled': 420000, 'update_time_ms': 2.414, 'num_steps_trained': 420000, 'load_time_ms': 0.651, 'default': {'kl': 0.013052679598331451, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.948760986328125, 'total_loss': 26.701265335083008, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.111075259745121, 'vf_explained_var': 0.9798588156700134, 'vf_loss': 26.799123764038086}, 'grad_time_ms': 760.168}",3934253,37169.40801501274,-152.35406502911871,cda-server-6,24,-168.57609319041728,{},8400,10.157.146.6,{},-142.43713855171399,0,1200,2025-08-30_00-56-05,350,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756508165,50.0,420000,37169.40801501274,97.00802159309387,350
+421200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93551.288, 'num_steps_sampled': 421200, 'update_time_ms': 2.427, 'num_steps_trained': 421200, 'load_time_ms': 0.614, 'default': {'kl': 0.013322807848453522, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.31839370727539, 'total_loss': 39.58547592163086, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12193938344717026, 'vf_explained_var': 0.9701064229011536, 'vf_loss': 39.69392395019531}, 'grad_time_ms': 766.501}",3934253,37276.0480325222,-152.9349523520042,cda-server-6,24,-182.25825795156348,{},8424,10.157.146.6,{},-142.43713855171399,0,1200,2025-08-30_00-57-52,351,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756508272,50.0,421200,37276.0480325222,106.64001750946045,351
+422400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 96615.82, 'num_steps_sampled': 422400, 'update_time_ms': 2.446, 'num_steps_trained': 422400, 'load_time_ms': 0.613, 'default': {'kl': 0.014840834774076939, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.174718856811523, 'total_loss': 31.508209228515625, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1351870745420456, 'vf_explained_var': 0.9790176749229431, 'vf_loss': 31.62837028503418}, 'grad_time_ms': 769.346}",3934253,37396.09178161621,-153.35956760196896,cda-server-6,24,-182.25825795156348,{},8448,10.157.146.6,{},-142.43713855171399,0,1200,2025-08-30_00-59-52,352,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756508392,50.0,422400,37396.09178161621,120.0437490940094,352
+423600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 97717.845, 'num_steps_sampled': 423600, 'update_time_ms': 2.44, 'num_steps_trained': 423600, 'load_time_ms': 0.605, 'default': {'kl': 0.014833922497928143, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.910870552062988, 'total_loss': 21.269311904907227, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1376529335975647, 'vf_explained_var': 0.9843950271606445, 'vf_loss': 21.391944885253906}, 'grad_time_ms': 759.658}",3934253,37498.67289829254,-153.08415396170028,cda-server-6,24,-182.25825795156348,{},8472,10.157.146.6,{},-142.43713855171399,0,1200,2025-08-30_01-01-34,353,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756508494,50.0,423600,37498.67289829254,102.58111667633057,353
+424800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 98199.752, 'num_steps_sampled': 424800, 'update_time_ms': 2.418, 'num_steps_trained': 424800, 'load_time_ms': 0.604, 'default': {'kl': 0.01393041666597128, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.231510162353516, 'total_loss': 19.14379119873047, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1328810304403305, 'vf_explained_var': 0.9846649169921875, 'vf_loss': 19.262569427490234}, 'grad_time_ms': 751.339}",3934253,37599.13117814064,-153.2069426241487,cda-server-6,24,-182.25825795156348,{},8496,10.157.146.6,{},-139.1586138095392,0,1200,2025-08-30_01-03-15,354,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756508595,50.0,424800,37599.13117814064,100.45827984809875,354
+426000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 96474.088, 'num_steps_sampled': 426000, 'update_time_ms': 2.442, 'num_steps_trained': 426000, 'load_time_ms': 0.606, 'default': {'kl': 0.013703294098377228, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.181726455688477, 'total_loss': 22.11202621459961, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12982912361621857, 'vf_explained_var': 0.9826943278312683, 'vf_loss': 22.227983474731445}, 'grad_time_ms': 754.057}",3934253,37687.463785886765,-152.87976951541432,cda-server-6,24,-178.5151443402442,{},8520,10.157.146.6,{},-139.1586138095392,0,1200,2025-08-30_01-04-43,355,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756508683,50.0,426000,37687.463785886765,88.33260774612427,355
+427200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 98364.902, 'num_steps_sampled': 427200, 'update_time_ms': 2.483, 'num_steps_trained': 427200, 'load_time_ms': 0.604, 'default': {'kl': 0.014798227697610855, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.146353721618652, 'total_loss': 19.512731552124023, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11665691435337067, 'vf_explained_var': 0.9837243556976318, 'vf_loss': 19.614402770996094}, 'grad_time_ms': 762.176}",3934253,37792.859236478806,-152.4326080480917,cda-server-6,24,-168.48851998476675,{},8544,10.157.146.6,{},-139.1586138095392,0,1200,2025-08-30_01-06-28,356,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756508788,50.0,427200,37792.859236478806,105.39545059204102,356
+428400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 96947.825, 'num_steps_sampled': 428400, 'update_time_ms': 2.416, 'num_steps_trained': 428400, 'load_time_ms': 0.608, 'default': {'kl': 0.014719611965119839, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.013218879699707, 'total_loss': 15.863059043884277, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13711626827716827, 'vf_explained_var': 0.9880774021148682, 'vf_loss': 15.985271453857422}, 'grad_time_ms': 772.157}",3934253,37872.6226978302,-152.81097276852893,cda-server-6,24,-168.48851998476675,{},8568,10.157.146.6,{},-139.1586138095392,0,1200,2025-08-30_01-07-48,357,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756508868,50.0,428400,37872.6226978302,79.76346135139465,357
+429600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 98484.83, 'num_steps_sampled': 429600, 'update_time_ms': 2.418, 'num_steps_trained': 429600, 'load_time_ms': 0.595, 'default': {'kl': 0.013437781482934952, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.048007011413574, 'total_loss': 26.254295349121094, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1137915551662445, 'vf_explained_var': 0.9829705357551575, 'vf_loss': 26.354480743408203}, 'grad_time_ms': 767.708}",3934253,37964.91802740097,-152.98573683136482,cda-server-6,24,-168.48851998476675,{},8592,10.157.146.6,{},-143.0228323504369,0,1200,2025-08-30_01-09-20,358,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756508960,50.0,429600,37964.91802740097,92.29532957077026,358
+430800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 98218.345, 'num_steps_sampled': 430800, 'update_time_ms': 2.451, 'num_steps_trained': 430800, 'load_time_ms': 0.597, 'default': {'kl': 0.013722885400056839, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.970488548278809, 'total_loss': 13.448766708374023, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1254318505525589, 'vf_explained_var': 0.9887028932571411, 'vf_loss': 13.56030559539795}, 'grad_time_ms': 777.334}",3934253,38062.439425468445,-152.59503919828575,cda-server-6,24,-163.3151418152035,{},8616,10.157.146.6,{},-143.94562985426637,0,1200,2025-08-30_01-10-58,359,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756509058,50.0,430800,38062.439425468445,97.52139806747437,359
+432000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 98022.492, 'num_steps_sampled': 432000, 'update_time_ms': 2.485, 'num_steps_trained': 432000, 'load_time_ms': 0.595, 'default': {'kl': 0.01442575454711914, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.238739013671875, 'total_loss': 29.425323486328125, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13105913996696472, 'vf_explained_var': 0.9780151844024658, 'vf_loss': 29.541778564453125}, 'grad_time_ms': 778.193}",3934253,38157.49730968475,-152.59521854700185,cda-server-6,24,-163.75715808807124,{},8640,10.157.146.6,{},-143.94562985426637,0,1200,2025-08-30_01-12-33,360,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756509153,50.0,432000,38157.49730968475,95.0578842163086,360
+433200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 96048.622, 'num_steps_sampled': 433200, 'update_time_ms': 2.476, 'num_steps_trained': 433200, 'load_time_ms': 0.606, 'default': {'kl': 0.01245577447116375, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.976419448852539, 'total_loss': 17.008989334106445, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12027224898338318, 'vf_explained_var': 0.9869714379310608, 'vf_loss': 17.11665153503418}, 'grad_time_ms': 785.689}",3934253,38244.47419548035,-152.5972637993256,cda-server-6,24,-164.38796960241405,{},8664,10.157.146.6,{},-143.94562985426637,0,1200,2025-08-30_01-14-00,361,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756509240,50.0,433200,38244.47419548035,86.97688579559326,361
+434400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94103.607, 'num_steps_sampled': 434400, 'update_time_ms': 2.541, 'num_steps_trained': 434400, 'load_time_ms': 0.606, 'default': {'kl': 0.013436969369649887, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.918680191040039, 'total_loss': 33.949283599853516, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12893246114253998, 'vf_explained_var': 0.9761844277381897, 'vf_loss': 34.06460952758789}, 'grad_time_ms': 782.457}",3934253,38345.03595113754,-152.69931875687953,cda-server-6,24,-170.43808917486143,{},8688,10.157.146.6,{},-143.0843494317296,0,1200,2025-08-30_01-15-41,362,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756509341,50.0,434400,38345.03595113754,100.56175565719604,362
+435600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94747.806, 'num_steps_sampled': 435600, 'update_time_ms': 2.617, 'num_steps_trained': 435600, 'load_time_ms': 0.609, 'default': {'kl': 0.015112587250769138, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.319666862487793, 'total_loss': 37.49136734008789, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1251940131187439, 'vf_explained_var': 0.9763219356536865, 'vf_loss': 37.601261138916016}, 'grad_time_ms': 791.851}",3934253,38454.15379524231,-153.22232896328572,cda-server-6,24,-175.87434224939994,{},8712,10.157.146.6,{},-143.0843494317296,0,1200,2025-08-30_01-17-30,363,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756509450,50.0,435600,38454.15379524231,109.11784410476685,363
+436800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93394.588, 'num_steps_sampled': 436800, 'update_time_ms': 2.647, 'num_steps_trained': 436800, 'load_time_ms': 0.609, 'default': {'kl': 0.013049306347966194, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.128003120422363, 'total_loss': 40.55475997924805, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12103336304426193, 'vf_explained_var': 0.9705398082733154, 'vf_loss': 40.66258239746094}, 'grad_time_ms': 798.195}",3934253,38541.14335441589,-153.27688113916284,cda-server-6,24,-175.87434224939994,{},8736,10.157.146.6,{},-143.0843494317296,0,1200,2025-08-30_01-18-57,364,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756509537,50.0,436800,38541.14335441589,86.98955917358398,364
+438000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95569.423, 'num_steps_sampled': 438000, 'update_time_ms': 2.643, 'num_steps_trained': 438000, 'load_time_ms': 0.604, 'default': {'kl': 0.013601518236100674, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.885064125061035, 'total_loss': 24.32900619506836, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13153356313705444, 'vf_explained_var': 0.9816988706588745, 'vf_loss': 24.446767807006836}, 'grad_time_ms': 779.284}",3934253,38651.03580594063,-153.14458819004005,cda-server-6,24,-175.87434224939994,{},8760,10.157.146.6,{},-143.0843494317296,0,1200,2025-08-30_01-20-47,365,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756509647,50.0,438000,38651.03580594063,109.8924515247345,365
+439200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94534.394, 'num_steps_sampled': 439200, 'update_time_ms': 2.634, 'num_steps_trained': 439200, 'load_time_ms': 0.62, 'default': {'kl': 0.01434319093823433, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.187789916992188, 'total_loss': 27.139606475830078, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1373453140258789, 'vf_explained_var': 0.9785805940628052, 'vf_loss': 27.26243019104004}, 'grad_time_ms': 784.596}",3934253,38746.134162187576,-153.03023594593262,cda-server-6,24,-175.87434224939994,{},8784,10.157.146.6,{},-145.97616584542013,0,1200,2025-08-30_01-22-22,366,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756509742,50.0,439200,38746.134162187576,95.09835624694824,366
+440400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95815.865, 'num_steps_sampled': 440400, 'update_time_ms': 2.681, 'num_steps_trained': 440400, 'load_time_ms': 0.614, 'default': {'kl': 0.01304242480546236, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.783220291137695, 'total_loss': 21.39423179626465, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1273837685585022, 'vf_explained_var': 0.9835090637207031, 'vf_loss': 21.50840950012207}, 'grad_time_ms': 777.45}",3934253,38838.64204645157,-152.98505145091403,cda-server-6,24,-175.1536698558524,{},8808,10.157.146.6,{},-147.98162832608875,0,1200,2025-08-30_01-23-54,367,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756509834,50.0,440400,38838.64204645157,92.50788426399231,367
+441600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95877.34, 'num_steps_sampled': 441600, 'update_time_ms': 2.732, 'num_steps_trained': 441600, 'load_time_ms': 0.625, 'default': {'kl': 0.014993922784924507, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.211225509643555, 'total_loss': 30.539302825927734, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13054004311561584, 'vf_explained_var': 0.9768690466880798, 'vf_loss': 30.654659271240234}, 'grad_time_ms': 774.471}",3934253,38931.52576327324,-153.22040865837252,cda-server-6,24,-175.1536698558524,{},8832,10.157.146.6,{},-147.98162832608875,0,1200,2025-08-30_01-25-27,368,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756509927,50.0,441600,38931.52576327324,92.88371682167053,368
+442800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95571.739, 'num_steps_sampled': 442800, 'update_time_ms': 2.779, 'num_steps_trained': 442800, 'load_time_ms': 0.63, 'default': {'kl': 0.014243930578231812, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.84453010559082, 'total_loss': 14.643656730651855, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12754985690116882, 'vf_explained_var': 0.9877651929855347, 'vf_loss': 14.75678539276123}, 'grad_time_ms': 774.089}",3934253,39025.988913059235,-153.35602109097817,cda-server-6,24,-175.1536698558524,{},8856,10.157.146.6,{},-147.98162832608875,0,1200,2025-08-30_01-27-02,369,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756510022,50.0,442800,39025.988913059235,94.46314978599548,369
+444000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93135.38, 'num_steps_sampled': 444000, 'update_time_ms': 2.732, 'num_steps_trained': 444000, 'load_time_ms': 0.629, 'default': {'kl': 0.014785230159759521, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.96976089477539, 'total_loss': 14.757744789123535, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11156058311462402, 'vf_explained_var': 0.9873138070106506, 'vf_loss': 14.854334831237793}, 'grad_time_ms': 773.513}",3934253,39096.677599191666,-153.33171487671436,cda-server-6,24,-175.1536698558524,{},8880,10.157.146.6,{},-149.2437295888303,0,1200,2025-08-30_01-28-12,370,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756510092,50.0,444000,39096.677599191666,70.68868613243103,370
+445200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94533.882, 'num_steps_sampled': 445200, 'update_time_ms': 2.737, 'num_steps_trained': 445200, 'load_time_ms': 0.635, 'default': {'kl': 0.014057965949177742, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.287542343139648, 'total_loss': 23.995384216308594, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12851697206497192, 'vf_explained_var': 0.9828624725341797, 'vf_loss': 24.10966682434082}, 'grad_time_ms': 766.421}",3934253,39197.56882786751,-153.44320350684313,cda-server-6,24,-171.5362803146453,{},8904,10.157.146.6,{},-143.9455142032621,0,1200,2025-08-30_01-29-53,371,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756510193,50.0,445200,39197.56882786751,100.89122867584229,371
+446400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93615.93, 'num_steps_sampled': 446400, 'update_time_ms': 2.668, 'num_steps_trained': 446400, 'load_time_ms': 0.63, 'default': {'kl': 0.01378762349486351, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.977514266967773, 'total_loss': 16.470462799072266, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12618333101272583, 'vf_explained_var': 0.9869677424430847, 'vf_loss': 16.582687377929688}, 'grad_time_ms': 770.079}",3934253,39288.986879348755,-153.08341630954703,cda-server-6,24,-171.5362803146453,{},8928,10.157.146.6,{},-143.9455142032621,0,1200,2025-08-30_01-31-25,372,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756510285,50.0,446400,39288.986879348755,91.41805148124695,372
+447600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94375.551, 'num_steps_sampled': 447600, 'update_time_ms': 2.64, 'num_steps_trained': 447600, 'load_time_ms': 0.662, 'default': {'kl': 0.013898391276597977, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.216779708862305, 'total_loss': 48.11854934692383, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12654566764831543, 'vf_explained_var': 0.9678885340690613, 'vf_loss': 48.23102569580078}, 'grad_time_ms': 763.214}",3934253,39405.63260102272,-153.18250980534327,cda-server-6,24,-171.5362803146453,{},8952,10.157.146.6,{},-143.9455142032621,0,1200,2025-08-30_01-33-21,373,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756510401,50.0,447600,39405.63260102272,116.64572167396545,373
+448800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94646.345, 'num_steps_sampled': 448800, 'update_time_ms': 2.658, 'num_steps_trained': 448800, 'load_time_ms': 0.673, 'default': {'kl': 0.01273138914257288, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.943889617919922, 'total_loss': 28.784555435180664, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1258401721715927, 'vf_explained_var': 0.977308988571167, 'vf_loss': 28.897504806518555}, 'grad_time_ms': 767.596}",3934253,39495.37490296364,-153.0939093284892,cda-server-6,24,-171.5362803146453,{},8976,10.157.146.6,{},-142.9277414104081,0,1200,2025-08-30_01-34-51,374,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756510491,50.0,448800,39495.37490296364,89.74230194091797,374
+450000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92320.277, 'num_steps_sampled': 450000, 'update_time_ms': 2.638, 'num_steps_trained': 450000, 'load_time_ms': 0.67, 'default': {'kl': 0.012571917846798897, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.955538749694824, 'total_loss': 23.156606674194336, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1303580403327942, 'vf_explained_var': 0.9838725328445435, 'vf_loss': 23.274234771728516}, 'grad_time_ms': 780.454}",3934253,39582.134382009506,-153.18728333636233,cda-server-6,24,-170.6081921394304,{},9000,10.157.146.6,{},-142.9277414104081,0,1200,2025-08-30_01-36-18,375,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756510578,50.0,450000,39582.134382009506,86.75947904586792,375
+451200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92309.316, 'num_steps_sampled': 451200, 'update_time_ms': 2.649, 'num_steps_trained': 451200, 'load_time_ms': 0.65, 'default': {'kl': 0.014042828232049942, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.61319351196289, 'total_loss': 29.14134979248047, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12289823591709137, 'vf_explained_var': 0.9794071912765503, 'vf_loss': 29.25002670288086}, 'grad_time_ms': 779.417}",3934253,39677.111968278885,-153.18059563870236,cda-server-6,24,-174.89906397580594,{},9024,10.157.146.6,{},-142.9277414104081,0,1200,2025-08-30_01-37-53,376,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756510673,50.0,451200,39677.111968278885,94.97758626937866,376
+452400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92738.748, 'num_steps_sampled': 452400, 'update_time_ms': 2.664, 'num_steps_trained': 452400, 'load_time_ms': 0.652, 'default': {'kl': 0.012846022844314575, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.973522186279297, 'total_loss': 34.768245697021484, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1277947723865509, 'vf_explained_var': 0.9744422435760498, 'vf_loss': 34.883033752441406}, 'grad_time_ms': 791.139}",3934253,39774.030656814575,-153.291892610524,cda-server-6,24,-174.89906397580594,{},9048,10.157.146.6,{},-136.53761693354755,0,1200,2025-08-30_01-39-30,377,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756510770,50.0,452400,39774.030656814575,96.91868853569031,377
+453600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 91769.429, 'num_steps_sampled': 453600, 'update_time_ms': 2.585, 'num_steps_trained': 453600, 'load_time_ms': 0.646, 'default': {'kl': 0.015167261473834515, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.005805969238281, 'total_loss': 22.82137680053711, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13752031326293945, 'vf_explained_var': 0.9811097383499146, 'vf_loss': 22.943540573120117}, 'grad_time_ms': 801.995}",3934253,39857.32714128494,-153.35642537200582,cda-server-6,24,-174.89906397580594,{},9072,10.157.146.6,{},-136.53761693354755,0,1200,2025-08-30_01-40-53,378,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756510853,50.0,453600,39857.32714128494,83.29648447036743,378
+454800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93552.532, 'num_steps_sampled': 454800, 'update_time_ms': 2.553, 'num_steps_trained': 454800, 'load_time_ms': 0.644, 'default': {'kl': 0.01317631546407938, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.828235626220703, 'total_loss': 17.909996032714844, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12949572503566742, 'vf_explained_var': 0.9854044318199158, 'vf_loss': 18.02614974975586}, 'grad_time_ms': 788.433}",3934253,39969.48629593849,-153.18384773613363,cda-server-6,24,-174.89906397580594,{},9096,10.157.146.6,{},-136.53761693354755,0,1200,2025-08-30_01-42-45,379,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756510965,50.0,454800,39969.48629593849,112.1591546535492,379
+456000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93344.313, 'num_steps_sampled': 456000, 'update_time_ms': 2.541, 'num_steps_trained': 456000, 'load_time_ms': 0.643, 'default': {'kl': 0.014248888939619064, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.349405288696289, 'total_loss': 17.249818801879883, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1263115406036377, 'vf_explained_var': 0.9867851734161377, 'vf_loss': 17.36170196533203}, 'grad_time_ms': 772.99}",3934253,40037.937469005585,-153.1766155089574,cda-server-6,24,-174.89906397580594,{},9120,10.157.146.6,{},-136.53761693354755,0,1200,2025-08-30_01-43-54,380,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756511034,50.0,456000,40037.937469005585,68.4511730670929,380
+457200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94568.446, 'num_steps_sampled': 457200, 'update_time_ms': 2.599, 'num_steps_trained': 457200, 'load_time_ms': 0.632, 'default': {'kl': 0.014296084642410278, 'cur_lr': 4.999999873689376e-05, 'entropy': 10.027332305908203, 'total_loss': 19.0135555267334, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12623052299022675, 'vf_explained_var': 0.9851264953613281, 'vf_loss': 19.125308990478516}, 'grad_time_ms': 777.1}",3934253,40151.110609054565,-152.8896881821496,cda-server-6,24,-168.73716899846337,{},9144,10.157.146.6,{},-147.7964379594772,0,1200,2025-08-30_01-45-47,381,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756511147,50.0,457200,40151.110609054565,113.17314004898071,381
+458400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92486.574, 'num_steps_sampled': 458400, 'update_time_ms': 2.597, 'num_steps_trained': 458400, 'load_time_ms': 0.638, 'default': {'kl': 0.014583314768970013, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.759105682373047, 'total_loss': 17.389978408813477, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1328737437725067, 'vf_explained_var': 0.9858565926551819, 'vf_loss': 17.508085250854492}, 'grad_time_ms': 785.307}",3934253,40221.79202866554,-152.79855423647666,cda-server-6,24,-164.1867128581947,{},9168,10.157.146.6,{},-136.55946156197663,0,1200,2025-08-30_01-46-58,382,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756511218,50.0,458400,40221.79202866554,70.68141961097717,382
+459600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92935.526, 'num_steps_sampled': 459600, 'update_time_ms': 2.556, 'num_steps_trained': 459600, 'load_time_ms': 0.603, 'default': {'kl': 0.013046178966760635, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.58828067779541, 'total_loss': 29.252241134643555, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13703730702400208, 'vf_explained_var': 0.9791484475135803, 'vf_loss': 29.376068115234375}, 'grad_time_ms': 771.747}",3934253,40342.79056477547,-153.13422255735932,cda-server-6,24,-174.47439067250346,{},9192,10.157.146.6,{},-136.55946156197663,0,1200,2025-08-30_01-48-59,383,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756511339,50.0,459600,40342.79056477547,120.99853610992432,383
+460800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93259.448, 'num_steps_sampled': 460800, 'update_time_ms': 2.518, 'num_steps_trained': 460800, 'load_time_ms': 0.601, 'default': {'kl': 0.014214631170034409, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.635729789733887, 'total_loss': 14.057104110717773, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1296011358499527, 'vf_explained_var': 0.9884568452835083, 'vf_loss': 14.172313690185547}, 'grad_time_ms': 773.144}",3934253,40435.78492999077,-153.0059882991506,cda-server-6,24,-174.47439067250346,{},9216,10.157.146.6,{},-136.55946156197663,0,1200,2025-08-30_01-50-32,384,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756511432,50.0,460800,40435.78492999077,92.99436521530151,384
+462000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94043.3, 'num_steps_sampled': 462000, 'update_time_ms': 2.555, 'num_steps_trained': 462000, 'load_time_ms': 0.603, 'default': {'kl': 0.01449158787727356, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.566226959228516, 'total_loss': 22.213275909423828, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12301838397979736, 'vf_explained_var': 0.9816312789916992, 'vf_loss': 22.321619033813477}, 'grad_time_ms': 765.474}",3934253,40530.30609059334,-152.85925076260227,cda-server-6,24,-174.47439067250346,{},9240,10.157.146.6,{},-136.55946156197663,0,1200,2025-08-30_01-52-06,385,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756511526,50.0,462000,40530.30609059334,94.52116060256958,385
+463200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95899.053, 'num_steps_sampled': 463200, 'update_time_ms': 2.533, 'num_steps_trained': 463200, 'load_time_ms': 0.614, 'default': {'kl': 0.013922227546572685, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.574094772338867, 'total_loss': 23.11071014404297, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.115452341735363, 'vf_explained_var': 0.9839463829994202, 'vf_loss': 23.212068557739258}, 'grad_time_ms': 767.397}",3934253,40643.86023974419,-153.37401042518425,cda-server-6,24,-174.47439067250346,{},9264,10.157.146.6,{},-136.55946156197663,0,1200,2025-08-30_01-54-00,386,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756511640,50.0,463200,40643.86023974419,113.55414915084839,386
+464400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94600.447, 'num_steps_sampled': 464400, 'update_time_ms': 2.467, 'num_steps_trained': 464400, 'load_time_ms': 0.617, 'default': {'kl': 0.013455020263791084, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.93942928314209, 'total_loss': 71.57559204101562, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1256643831729889, 'vf_explained_var': 0.9553078413009644, 'vf_loss': 71.68763732910156}, 'grad_time_ms': 760.01}",3934253,40727.71838593483,-153.81044741787505,cda-server-6,24,-185.61971742619494,{},9288,10.157.146.6,{},-142.7858068921068,0,1200,2025-08-30_01-55-24,387,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756511724,50.0,464400,40727.71838593483,83.85814619064331,387
+465600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95143.577, 'num_steps_sampled': 465600, 'update_time_ms': 2.487, 'num_steps_trained': 465600, 'load_time_ms': 0.628, 'default': {'kl': 0.014179746620357037, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.57951545715332, 'total_loss': 22.397836685180664, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12766240537166595, 'vf_explained_var': 0.9822462797164917, 'vf_loss': 22.51114273071289}, 'grad_time_ms': 761.875}",3934253,40816.4643805027,-153.7662331758303,cda-server-6,24,-185.61971742619494,{},9312,10.157.146.6,{},-141.64657409231407,0,1200,2025-08-30_01-56-53,388,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756511813,50.0,465600,40816.4643805027,88.7459945678711,388
+466800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94702.824, 'num_steps_sampled': 466800, 'update_time_ms': 2.473, 'num_steps_trained': 466800, 'load_time_ms': 0.624, 'default': {'kl': 0.013959686271846294, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.685425758361816, 'total_loss': 20.270750045776367, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13417869806289673, 'vf_explained_var': 0.98442143201828, 'vf_loss': 20.390796661376953}, 'grad_time_ms': 760.271}",3934253,40924.1979534626,-153.9711238325928,cda-server-6,24,-185.61971742619494,{},9336,10.157.146.6,{},-141.64657409231407,0,1200,2025-08-30_01-58-40,389,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756511920,50.0,466800,40924.1979534626,107.7335729598999,389
+468000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 97748.726, 'num_steps_sampled': 468000, 'update_time_ms': 2.538, 'num_steps_trained': 468000, 'load_time_ms': 0.629, 'default': {'kl': 0.0150027209892869, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.611435890197754, 'total_loss': 16.69760513305664, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1160043478012085, 'vf_explained_var': 0.9859545230865479, 'vf_loss': 16.798418045043945}, 'grad_time_ms': 776.435}",3934253,41023.27092075348,-153.2059437076237,cda-server-6,24,-185.61971742619494,{},9360,10.157.146.6,{},-141.64657409231407,0,1200,2025-08-30_02-00-19,390,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756512019,50.0,468000,41023.27092075348,99.0729672908783,390
+469200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 97941.064, 'num_steps_sampled': 469200, 'update_time_ms': 2.545, 'num_steps_trained': 469200, 'load_time_ms': 0.625, 'default': {'kl': 0.01452625822275877, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.65519905090332, 'total_loss': 22.100902557373047, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12013532221317291, 'vf_explained_var': 0.982020378112793, 'vf_loss': 22.206329345703125}, 'grad_time_ms': 765.075}",3934253,41138.254877090454,-152.90577764465885,cda-server-6,24,-170.25628936587407,{},9384,10.157.146.6,{},-141.64657409231407,0,1200,2025-08-30_02-02-14,391,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756512134,50.0,469200,41138.254877090454,114.9839563369751,391
+470400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 100327.709, 'num_steps_sampled': 470400, 'update_time_ms': 2.58, 'num_steps_trained': 470400, 'load_time_ms': 0.627, 'default': {'kl': 0.01367896981537342, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.717622756958008, 'total_loss': 20.730247497558594, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12303749471902847, 'vf_explained_var': 0.9840138554573059, 'vf_loss': 20.839435577392578}, 'grad_time_ms': 740.551}",3934253,41232.55836844444,-152.76562004405554,cda-server-6,24,-169.46345236421746,{},9408,10.157.146.6,{},-146.8892861391005,0,1200,2025-08-30_02-03-49,392,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756512229,50.0,470400,41232.55836844444,94.30349135398865,392
+471600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 98909.243, 'num_steps_sampled': 471600, 'update_time_ms': 2.595, 'num_steps_trained': 471600, 'load_time_ms': 0.63, 'default': {'kl': 0.01300249807536602, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.604305267333984, 'total_loss': 19.531492233276367, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11763381958007812, 'vf_explained_var': 0.9843325018882751, 'vf_loss': 19.635961532592773}, 'grad_time_ms': 758.83}",3934253,41339.5549621582,-152.6976787690023,cda-server-6,24,-169.46345236421746,{},9432,10.157.146.6,{},-142.83068117605868,0,1200,2025-08-30_02-05-36,393,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756512336,50.0,471600,41339.5549621582,106.99659371376038,393
+472800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 98506.519, 'num_steps_sampled': 472800, 'update_time_ms': 2.567, 'num_steps_trained': 472800, 'load_time_ms': 0.624, 'default': {'kl': 0.013971512205898762, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.62321949005127, 'total_loss': 13.953452110290527, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12165407091379166, 'vf_explained_var': 0.9881808757781982, 'vf_loss': 14.06096076965332}, 'grad_time_ms': 755.993}",3934253,41428.493270635605,-152.73886156700593,cda-server-6,24,-169.46345236421746,{},9456,10.157.146.6,{},-142.81457270615553,0,1200,2025-08-30_02-07-05,394,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756512425,50.0,472800,41428.493270635605,88.93830847740173,394
+474000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 97189.632, 'num_steps_sampled': 474000, 'update_time_ms': 2.492, 'num_steps_trained': 474000, 'load_time_ms': 0.628, 'default': {'kl': 0.012882490642368793, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.573514938354492, 'total_loss': 30.2314453125, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11516463756561279, 'vf_explained_var': 0.978480875492096, 'vf_loss': 30.333566665649414}, 'grad_time_ms': 755.275}",3934253,41509.83872747421,-152.55249194049549,cda-server-6,24,-165.33702468179493,{},9480,10.157.146.6,{},-142.81457270615553,0,1200,2025-08-30_02-08-26,395,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756512506,50.0,474000,41509.83872747421,81.34545683860779,395
+475200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 97357.778, 'num_steps_sampled': 475200, 'update_time_ms': 2.494, 'num_steps_trained': 475200, 'load_time_ms': 0.626, 'default': {'kl': 0.015613549388945103, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.676960945129395, 'total_loss': 20.19458770751953, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12485632300376892, 'vf_explained_var': 0.9847643375396729, 'vf_loss': 20.303634643554688}, 'grad_time_ms': 730.936}",3934253,41624.83124899864,-152.67328401582608,cda-server-6,24,-165.33702468179493,{},9504,10.157.146.6,{},-142.81457270615553,0,1200,2025-08-30_02-10-21,396,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756512621,50.0,475200,41624.83124899864,114.99252152442932,396
+476400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 100373.912, 'num_steps_sampled': 476400, 'update_time_ms': 2.536, 'num_steps_trained': 476400, 'load_time_ms': 0.621, 'default': {'kl': 0.014947790652513504, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.738167762756348, 'total_loss': 26.864194869995117, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14435940980911255, 'vf_explained_var': 0.9804407954216003, 'vf_loss': 26.99342155456543}, 'grad_time_ms': 726.922}",3934253,41738.81172847748,-152.73994919692365,cda-server-6,24,-165.33702468179493,{},9528,10.157.146.6,{},-142.81457270615553,0,1200,2025-08-30_02-12-15,397,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756512735,50.0,476400,41738.81172847748,113.98047947883606,397
+477600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 99110.223, 'num_steps_sampled': 477600, 'update_time_ms': 2.541, 'num_steps_trained': 477600, 'load_time_ms': 0.621, 'default': {'kl': 0.014783354476094246, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.937175750732422, 'total_loss': 36.52134323120117, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.125640869140625, 'vf_explained_var': 0.9722763895988464, 'vf_loss': 36.632015228271484}, 'grad_time_ms': 725.375}",3934253,41814.905596494675,-153.10580437943494,cda-server-6,24,-178.8284532302404,{},9552,10.157.146.6,{},-148.82691292199615,0,1200,2025-08-30_02-13-31,398,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756512811,50.0,477600,41814.905596494675,76.09386801719666,398
+478800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 97041.268, 'num_steps_sampled': 478800, 'update_time_ms': 2.524, 'num_steps_trained': 478800, 'load_time_ms': 0.626, 'default': {'kl': 0.01445402018725872, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.523843765258789, 'total_loss': 20.581594467163086, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12159392237663269, 'vf_explained_var': 0.9837309718132019, 'vf_loss': 20.68855094909668}, 'grad_time_ms': 734.791}",3934253,41902.04425191879,-153.46935723412918,cda-server-6,24,-178.8284532302404,{},9576,10.157.146.6,{},-148.82691292199615,0,1200,2025-08-30_02-14-58,399,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756512898,50.0,478800,41902.04425191879,87.13865542411804,399
+480000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 96533.831, 'num_steps_sampled': 480000, 'update_time_ms': 2.47, 'num_steps_trained': 480000, 'load_time_ms': 0.622, 'default': {'kl': 0.01431234646588564, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.579992294311523, 'total_loss': 22.560794830322266, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12019583582878113, 'vf_explained_var': 0.9842327833175659, 'vf_loss': 22.666500091552734}, 'grad_time_ms': 707.123}",3934253,41995.76532769203,-153.08571561344462,cda-server-6,24,-178.8284532302404,{},9600,10.157.146.6,{},-148.82691292199615,0,1200,2025-08-30_02-16-32,400,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756512992,50.0,480000,41995.76532769203,93.72107577323914,400
+481200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95440.939, 'num_steps_sampled': 481200, 'update_time_ms': 2.412, 'num_steps_trained': 481200, 'load_time_ms': 0.627, 'default': {'kl': 0.01310575008392334, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.719040870666504, 'total_loss': 35.705787658691406, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13067464530467987, 'vf_explained_var': 0.9781382083892822, 'vf_loss': 35.82319641113281}, 'grad_time_ms': 720.716}",3934253,42099.95502829552,-153.3988099397184,cda-server-6,24,-178.8284532302404,{},9624,10.157.146.6,{},-145.95915465653817,0,1200,2025-08-30_02-18-16,401,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756513096,50.0,481200,42099.95502829552,104.18970060348511,401
+482400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94308.325, 'num_steps_sampled': 482400, 'update_time_ms': 2.409, 'num_steps_trained': 482400, 'load_time_ms': 0.62, 'default': {'kl': 0.013833809643983841, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.736509323120117, 'total_loss': 35.20651626586914, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1259187012910843, 'vf_explained_var': 0.9735874533653259, 'vf_loss': 35.31842803955078}, 'grad_time_ms': 732.584}",3934253,42183.0499727726,-152.7485907641062,cda-server-6,24,-172.2784810744398,{},9648,10.157.146.6,{},-136.54575402752465,0,1200,2025-08-30_02-19-39,402,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756513179,50.0,482400,42183.0499727726,83.0949444770813,402
+483600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94824.157, 'num_steps_sampled': 483600, 'update_time_ms': 2.383, 'num_steps_trained': 483600, 'load_time_ms': 0.614, 'default': {'kl': 0.012754004448652267, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.690858840942383, 'total_loss': 21.811321258544922, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1276492029428482, 'vf_explained_var': 0.9835841655731201, 'vf_loss': 21.926057815551758}, 'grad_time_ms': 713.208}",3934253,42295.01004576683,-152.46891549800432,cda-server-6,24,-165.81996427857436,{},9672,10.157.146.6,{},-136.54575402752465,0,1200,2025-08-30_02-21-31,403,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756513291,50.0,483600,42295.01004576683,111.96007299423218,403
+484800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 97164.078, 'num_steps_sampled': 484800, 'update_time_ms': 2.413, 'num_steps_trained': 484800, 'load_time_ms': 0.609, 'default': {'kl': 0.014857407659292221, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.539432525634766, 'total_loss': 26.76873207092285, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13504831492900848, 'vf_explained_var': 0.9832797646522522, 'vf_loss': 26.888736724853516}, 'grad_time_ms': 691.336}",3934253,42407.1293554306,-152.53595371969553,cda-server-6,24,-165.81996427857436,{},9696,10.157.146.6,{},-136.54575402752465,0,1200,2025-08-30_02-23-23,404,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756513403,50.0,484800,42407.1293554306,112.11930966377258,404
+486000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 98961.935, 'num_steps_sampled': 486000, 'update_time_ms': 2.452, 'num_steps_trained': 486000, 'load_time_ms': 0.613, 'default': {'kl': 0.012585025280714035, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.700153350830078, 'total_loss': 33.97825622558594, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11734248697757721, 'vf_explained_var': 0.9767182469367981, 'vf_loss': 34.08285903930664}, 'grad_time_ms': 706.634}",3934253,42506.60624504089,-152.02951228995173,cda-server-6,24,-164.54826698210027,{},9720,10.157.146.6,{},-136.54575402752465,0,1200,2025-08-30_02-25-03,405,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756513503,50.0,486000,42506.60624504089,99.47688961029053,405
+487200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 97683.436, 'num_steps_sampled': 487200, 'update_time_ms': 2.47, 'num_steps_trained': 487200, 'load_time_ms': 0.605, 'default': {'kl': 0.012815814465284348, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.51749324798584, 'total_loss': 18.358110427856445, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13039404153823853, 'vf_explained_var': 0.9866368770599365, 'vf_loss': 18.475528717041016}, 'grad_time_ms': 721.45}",3934253,42608.96180129051,-152.5057609427979,cda-server-6,24,-164.98823848315914,{},9744,10.157.146.6,{},-145.99157178352348,0,1200,2025-08-30_02-26-45,406,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756513605,50.0,487200,42608.96180129051,102.35555624961853,406
+488400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 97026.104, 'num_steps_sampled': 488400, 'update_time_ms': 2.467, 'num_steps_trained': 488400, 'load_time_ms': 0.609, 'default': {'kl': 0.013667297549545765, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.660782814025879, 'total_loss': 34.40043258666992, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.124124675989151, 'vf_explained_var': 0.9725708365440369, 'vf_loss': 34.510719299316406}, 'grad_time_ms': 729.874}",3934253,42716.45296001434,-152.4718104965969,cda-server-6,24,-164.98823848315914,{},9768,10.157.146.6,{},-143.33162856010452,0,1200,2025-08-30_02-28-33,407,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756513713,50.0,488400,42716.45296001434,107.49115872383118,407
+489600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 98824.481, 'num_steps_sampled': 489600, 'update_time_ms': 2.472, 'num_steps_trained': 489600, 'load_time_ms': 0.609, 'default': {'kl': 0.013919343240559101, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.630985260009766, 'total_loss': 16.17458152770996, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11430396139621735, 'vf_explained_var': 0.9877437949180603, 'vf_loss': 16.274789810180664}, 'grad_time_ms': 718.413}",3934253,42810.41572546959,-152.49322413360747,cda-server-6,24,-167.22391862857077,{},9792,10.157.146.6,{},-143.33162856010452,0,1200,2025-08-30_02-30-07,408,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756513807,50.0,489600,42810.41572546959,93.96276545524597,408
+490800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 100598.573, 'num_steps_sampled': 490800, 'update_time_ms': 2.456, 'num_steps_trained': 490800, 'load_time_ms': 0.604, 'default': {'kl': 0.013683994300663471, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.548572540283203, 'total_loss': 19.05156898498535, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1258140206336975, 'vf_explained_var': 0.9866318106651306, 'vf_loss': 19.163530349731445}, 'grad_time_ms': 716.211}",3934253,42915.273431539536,-152.54198270127512,cda-server-6,24,-171.02813922101154,{},9816,10.157.146.6,{},-142.7902382364414,0,1200,2025-08-30_02-31-52,409,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756513912,50.0,490800,42915.273431539536,104.85770606994629,409
+492000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 99761.434, 'num_steps_sampled': 492000, 'update_time_ms': 2.489, 'num_steps_trained': 492000, 'load_time_ms': 0.601, 'default': {'kl': 0.013874795287847519, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.748285293579102, 'total_loss': 21.56228256225586, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12434862554073334, 'vf_explained_var': 0.9823559522628784, 'vf_loss': 21.672584533691406}, 'grad_time_ms': 734.541}",3934253,43000.80782318115,-152.27884884345352,cda-server-6,24,-171.02813922101154,{},9840,10.157.146.6,{},-142.7902382364414,0,1200,2025-08-30_02-33-17,410,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756513997,50.0,492000,43000.80782318115,85.53439164161682,410
+493200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 98640.007, 'num_steps_sampled': 493200, 'update_time_ms': 2.536, 'num_steps_trained': 493200, 'load_time_ms': 0.617, 'default': {'kl': 0.013062255457043648, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.480387687683105, 'total_loss': 23.92295265197754, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14083310961723328, 'vf_explained_var': 0.9820898771286011, 'vf_loss': 24.050559997558594}, 'grad_time_ms': 731.141}",3934253,43093.75035619736,-152.388119586282,cda-server-6,24,-171.02813922101154,{},9864,10.157.146.6,{},-142.7902382364414,0,1200,2025-08-30_02-34-50,411,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756514090,50.0,493200,43093.75035619736,92.94253301620483,411
+494400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 100133.017, 'num_steps_sampled': 494400, 'update_time_ms': 2.551, 'num_steps_trained': 494400, 'load_time_ms': 0.62, 'default': {'kl': 0.01390067394822836, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.697103500366211, 'total_loss': 25.10484504699707, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13179221749305725, 'vf_explained_var': 0.9813117980957031, 'vf_loss': 25.22256088256836}, 'grad_time_ms': 732.214}",3934253,43191.7867565155,-152.50038821208054,cda-server-6,24,-171.02813922101154,{},9888,10.157.146.6,{},-142.7902382364414,0,1200,2025-08-30_02-36-28,412,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756514188,50.0,494400,43191.7867565155,98.03640031814575,412
+495600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 101028.588, 'num_steps_sampled': 495600, 'update_time_ms': 2.505, 'num_steps_trained': 495600, 'load_time_ms': 0.622, 'default': {'kl': 0.014487986452877522, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.295341491699219, 'total_loss': 20.39866065979004, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12015184760093689, 'vf_explained_var': 0.9833239316940308, 'vf_loss': 20.5041446685791}, 'grad_time_ms': 747.509}",3934253,43312.855503320694,-152.38164456540886,cda-server-6,24,-167.1559509614097,{},9912,10.157.146.6,{},-135.364826567015,0,1200,2025-08-30_02-38-29,413,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756514309,50.0,495600,43312.855503320694,121.06874680519104,413
+496800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 99359.069, 'num_steps_sampled': 496800, 'update_time_ms': 2.459, 'num_steps_trained': 496800, 'load_time_ms': 0.636, 'default': {'kl': 0.014094003476202488, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.570438385009766, 'total_loss': 25.21484375, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13121233880519867, 'vf_explained_var': 0.9804350733757019, 'vf_loss': 25.331787109375}, 'grad_time_ms': 760.267}",3934253,43408.40692996979,-152.84416168800163,cda-server-6,24,-167.1559509614097,{},9936,10.157.146.6,{},-135.364826567015,0,1200,2025-08-30_02-40-05,414,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756514405,50.0,496800,43408.40692996979,95.55142664909363,414
+498000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 99718.859, 'num_steps_sampled': 498000, 'update_time_ms': 2.471, 'num_steps_trained': 498000, 'load_time_ms': 0.634, 'default': {'kl': 0.013480665162205696, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.567373275756836, 'total_loss': 14.828624725341797, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12480054795742035, 'vf_explained_var': 0.9876997470855713, 'vf_loss': 14.939777374267578}, 'grad_time_ms': 757.832}",3934253,43511.4573700428,-152.41611989014177,cda-server-6,24,-167.1559509614097,{},9960,10.157.146.6,{},-135.364826567015,0,1200,2025-08-30_02-41-48,415,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756514508,50.0,498000,43511.4573700428,103.0504400730133,415
+499200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 99487.673, 'num_steps_sampled': 499200, 'update_time_ms': 2.494, 'num_steps_trained': 499200, 'load_time_ms': 0.671, 'default': {'kl': 0.014436847530305386, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.49398422241211, 'total_loss': 21.52405548095703, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13773185014724731, 'vf_explained_var': 0.9835493564605713, 'vf_loss': 21.64716911315918}, 'grad_time_ms': 762.62}",3934253,43611.55019903183,-152.60514110878364,cda-server-6,24,-166.84073942014268,{},9984,10.157.146.6,{},-135.364826567015,0,1200,2025-08-30_02-43-28,416,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756514608,50.0,499200,43611.55019903183,100.09282898902893,416
+500400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 98961.509, 'num_steps_sampled': 500400, 'update_time_ms': 2.496, 'num_steps_trained': 500400, 'load_time_ms': 0.673, 'default': {'kl': 0.013998076319694519, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.702149391174316, 'total_loss': 27.09682846069336, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14793071150779724, 'vf_explained_var': 0.980864405632019, 'vf_loss': 27.2305850982666}, 'grad_time_ms': 763.849}",3934253,43713.79194974899,-152.75236425338213,cda-server-6,24,-166.84073942014268,{},10008,10.157.146.6,{},-140.077182822348,0,1200,2025-08-30_02-45-10,417,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756514710,50.0,500400,43713.79194974899,102.24175071716309,417
+501600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 99558.243, 'num_steps_sampled': 501600, 'update_time_ms': 2.51, 'num_steps_trained': 501600, 'load_time_ms': 0.663, 'default': {'kl': 0.014370894990861416, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.21036434173584, 'total_loss': 20.671241760253906, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1176142692565918, 'vf_explained_var': 0.9830334782600403, 'vf_loss': 20.77430534362793}, 'grad_time_ms': 759.136}",3934253,43813.67440891266,-152.36210487112976,cda-server-6,24,-166.61907491036374,{},10032,10.157.146.6,{},-140.077182822348,0,1200,2025-08-30_02-46-50,418,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756514810,50.0,501600,43813.67440891266,99.88245916366577,418
+502800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 99253.648, 'num_steps_sampled': 502800, 'update_time_ms': 2.526, 'num_steps_trained': 502800, 'load_time_ms': 0.66, 'default': {'kl': 0.011671670712530613, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.367462158203125, 'total_loss': 23.715415954589844, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12150892615318298, 'vf_explained_var': 0.9807304739952087, 'vf_loss': 23.825103759765625}, 'grad_time_ms': 753.453}",3934253,43915.42871594429,-152.4740142506281,cda-server-6,24,-166.61907491036374,{},10056,10.157.146.6,{},-140.077182822348,0,1200,2025-08-30_02-48-32,419,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756514912,50.0,502800,43915.42871594429,101.75430703163147,419
+504000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 101192.681, 'num_steps_sampled': 504000, 'update_time_ms': 2.561, 'num_steps_trained': 504000, 'load_time_ms': 0.663, 'default': {'kl': 0.013619640842080116, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.553508758544922, 'total_loss': 19.23631477355957, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11064037680625916, 'vf_explained_var': 0.9852237701416016, 'vf_loss': 19.333168029785156}, 'grad_time_ms': 752.56}",3934253,44020.34438610077,-152.088058321042,cda-server-6,24,-166.61907491036374,{},10080,10.157.146.6,{},-140.077182822348,0,1200,2025-08-30_02-50-17,420,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756515017,50.0,504000,44020.34438610077,104.91567015647888,420
+505200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 102467.95, 'num_steps_sampled': 505200, 'update_time_ms': 2.496, 'num_steps_trained': 505200, 'load_time_ms': 0.65, 'default': {'kl': 0.01381174847483635, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.478511810302734, 'total_loss': 16.577302932739258, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1256970316171646, 'vf_explained_var': 0.9875710606575012, 'vf_loss': 16.689016342163086}, 'grad_time_ms': 756.674}",3934253,44126.080137491226,-152.20027245584026,cda-server-6,24,-163.9500105131882,{},10104,10.157.146.6,{},-142.6992763566649,0,1200,2025-08-30_02-52-03,421,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756515123,50.0,505200,44126.080137491226,105.73575139045715,421
+506400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 100725.323, 'num_steps_sampled': 506400, 'update_time_ms': 2.505, 'num_steps_trained': 506400, 'load_time_ms': 0.655, 'default': {'kl': 0.014820229262113571, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.623552322387695, 'total_loss': 15.060821533203125, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12630988657474518, 'vf_explained_var': 0.9873186945915222, 'vf_loss': 15.172125816345215}, 'grad_time_ms': 768.013}",3934253,44206.80386471748,-152.1666959661188,cda-server-6,24,-164.13063243563758,{},10128,10.157.146.6,{},-142.6992763566649,0,1200,2025-08-30_02-53-23,422,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756515203,50.0,506400,44206.80386471748,80.72372722625732,422
+507600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 99141.36, 'num_steps_sampled': 507600, 'update_time_ms': 2.558, 'num_steps_trained': 507600, 'load_time_ms': 0.662, 'default': {'kl': 0.014279918745160103, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.518680572509766, 'total_loss': 20.14760398864746, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12270474433898926, 'vf_explained_var': 0.9837811589241028, 'vf_loss': 20.255849838256836}, 'grad_time_ms': 760.582}",3934253,44311.95928025246,-152.37089182857787,cda-server-6,24,-164.13063243563758,{},10152,10.157.146.6,{},-142.6992763566649,0,1200,2025-08-30_02-55-09,423,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756515309,50.0,507600,44311.95928025246,105.15541553497314,423
+508800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 100358.021, 'num_steps_sampled': 508800, 'update_time_ms': 2.585, 'num_steps_trained': 508800, 'load_time_ms': 0.663, 'default': {'kl': 0.012729505077004433, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.574199676513672, 'total_loss': 24.127349853515625, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12281505018472672, 'vf_explained_var': 0.9814075827598572, 'vf_loss': 24.237276077270508}, 'grad_time_ms': 745.356}",3934253,44419.52580022812,-152.83940788648562,cda-server-6,24,-169.17921882612953,{},10176,10.157.146.6,{},-143.74108753127996,0,1200,2025-08-30_02-56-56,424,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756515416,50.0,508800,44419.52580022812,107.56651997566223,424
+510000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 96418.653, 'num_steps_sampled': 510000, 'update_time_ms': 2.585, 'num_steps_trained': 510000, 'load_time_ms': 0.661, 'default': {'kl': 0.014946307986974716, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.48218822479248, 'total_loss': 16.20340919494629, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14104242622852325, 'vf_explained_var': 0.9878532886505127, 'vf_loss': 16.32931900024414}, 'grad_time_ms': 749.272}",3934253,44483.22181510925,-152.96896037243326,cda-server-6,24,-169.17921882612953,{},10200,10.157.146.6,{},-143.74108753127996,0,1200,2025-08-30_02-58-00,425,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756515480,50.0,510000,44483.22181510925,63.69601488113403,425
+511200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94771.723, 'num_steps_sampled': 511200, 'update_time_ms': 2.541, 'num_steps_trained': 511200, 'load_time_ms': 0.625, 'default': {'kl': 0.013613752089440823, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.491787910461426, 'total_loss': 31.104705810546875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13406051695346832, 'vf_explained_var': 0.9770567417144775, 'vf_loss': 31.22498321533203}, 'grad_time_ms': 752.077}",3934253,44566.871950387955,-153.15485623507504,cda-server-6,24,-176.3212741594545,{},10224,10.157.146.6,{},-142.76878927498908,0,1200,2025-08-30_02-59-24,426,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756515564,50.0,511200,44566.871950387955,83.65013527870178,426
+512400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92743.774, 'num_steps_sampled': 512400, 'update_time_ms': 2.568, 'num_steps_trained': 512400, 'load_time_ms': 0.622, 'default': {'kl': 0.01447269693017006, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.206316947937012, 'total_loss': 16.799468994140625, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12001865357160568, 'vf_explained_var': 0.9866235256195068, 'vf_loss': 16.904834747314453}, 'grad_time_ms': 740.284}",3934253,44648.71591639519,-153.16727095351285,cda-server-6,24,-176.3212741594545,{},10248,10.157.146.6,{},-142.76878927498908,0,1200,2025-08-30_03-00-45,427,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756515645,50.0,512400,44648.71591639519,81.84396600723267,427
+513600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93115.004, 'num_steps_sampled': 513600, 'update_time_ms': 2.549, 'num_steps_trained': 513600, 'load_time_ms': 0.659, 'default': {'kl': 0.015185288153588772, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.132720947265625, 'total_loss': 31.58395004272461, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1356636881828308, 'vf_explained_var': 0.9850590825080872, 'vf_loss': 31.70423698425293}, 'grad_time_ms': 745.147}",3934253,44752.36105489731,-152.47133940099715,cda-server-6,24,-176.3212741594545,{},10272,10.157.146.6,{},-136.65559761541954,0,1200,2025-08-30_03-02-29,428,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756515749,50.0,513600,44752.36105489731,103.64513850212097,428
+514800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93212.02, 'num_steps_sampled': 514800, 'update_time_ms': 2.565, 'num_steps_trained': 514800, 'load_time_ms': 0.659, 'default': {'kl': 0.012950624339282513, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.307674407958984, 'total_loss': 20.67955780029297, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1103539988398552, 'vf_explained_var': 0.983638346195221, 'vf_loss': 20.77680015563965}, 'grad_time_ms': 755.405}",3934253,44855.18939137459,-152.44805865292292,cda-server-6,24,-176.3212741594545,{},10296,10.157.146.6,{},-136.65559761541954,0,1200,2025-08-30_03-04-12,429,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756515852,50.0,514800,44855.18939137459,102.82833647727966,429
+516000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 91066.479, 'num_steps_sampled': 516000, 'update_time_ms': 2.577, 'num_steps_trained': 516000, 'load_time_ms': 0.662, 'default': {'kl': 0.014143170788884163, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.144468307495117, 'total_loss': 20.466529846191406, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12884706258773804, 'vf_explained_var': 0.9840419292449951, 'vf_loss': 20.5810546875}, 'grad_time_ms': 760.555}",3934253,44938.701545238495,-152.37429926190597,cda-server-6,24,-164.15191290750363,{},10320,10.157.146.6,{},-136.65559761541954,0,1200,2025-08-30_03-05-35,430,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756515935,50.0,516000,44938.701545238495,83.51215386390686,430
+517200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 87798.525, 'num_steps_sampled': 517200, 'update_time_ms': 2.592, 'num_steps_trained': 517200, 'load_time_ms': 0.659, 'default': {'kl': 0.012897643260657787, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.147050857543945, 'total_loss': 23.396202087402344, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12415405362844467, 'vf_explained_var': 0.9816821217536926, 'vf_loss': 23.50729751586914}, 'grad_time_ms': 753.518}",3934253,45011.686506032944,-152.49162581204905,cda-server-6,24,-164.15191290750363,{},10344,10.157.146.6,{},-136.65559761541954,0,1200,2025-08-30_03-06-49,431,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756516009,50.0,517200,45011.686506032944,72.98496079444885,431
+518400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 87706.415, 'num_steps_sampled': 518400, 'update_time_ms': 2.562, 'num_steps_trained': 518400, 'load_time_ms': 0.67, 'default': {'kl': 0.01292494498193264, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.16193675994873, 'total_loss': 22.140846252441406, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12487272173166275, 'vf_explained_var': 0.982728123664856, 'vf_loss': 22.252634048461914}, 'grad_time_ms': 750.037}",3934253,45091.453741550446,-152.63505965031305,cda-server-6,24,-164.15191290750363,{},10368,10.157.146.6,{},-136.65559761541954,0,1200,2025-08-30_03-08-08,432,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756516088,50.0,518400,45091.453741550446,79.76723551750183,432
+519600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 86294.257, 'num_steps_sampled': 519600, 'update_time_ms': 2.543, 'num_steps_trained': 519600, 'load_time_ms': 0.697, 'default': {'kl': 0.014303537085652351, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.481611251831055, 'total_loss': 28.225297927856445, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13561999797821045, 'vf_explained_var': 0.9792253971099854, 'vf_loss': 28.346435546875}, 'grad_time_ms': 753.54}",3934253,45182.52351999283,-152.83210305416438,cda-server-6,24,-164.15191290750363,{},10392,10.157.146.6,{},-141.56123354539693,0,1200,2025-08-30_03-09-39,433,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756516179,50.0,519600,45182.52351999283,91.06977844238281,433
+520800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 87916.656, 'num_steps_sampled': 520800, 'update_time_ms': 2.533, 'num_steps_trained': 520800, 'load_time_ms': 0.692, 'default': {'kl': 0.013387994840741158, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.373644828796387, 'total_loss': 35.60469055175781, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1338808387517929, 'vf_explained_var': 0.9731928706169128, 'vf_loss': 35.72500991821289}, 'grad_time_ms': 779.642}",3934253,45306.57466197014,-152.94840318610505,cda-server-6,24,-164.67520643826325,{},10416,10.157.146.6,{},-141.56123354539693,0,1200,2025-08-30_03-11-43,434,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756516303,50.0,520800,45306.57466197014,124.05114197731018,434
+522000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 90730.781, 'num_steps_sampled': 522000, 'update_time_ms': 2.591, 'num_steps_trained': 522000, 'load_time_ms': 0.694, 'default': {'kl': 0.013694589026272297, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.236662864685059, 'total_loss': 25.01688003540039, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12344694137573242, 'vf_explained_var': 0.9816955924034119, 'vf_loss': 25.126461029052734}, 'grad_time_ms': 775.417}",3934253,45398.37049865723,-153.29016551966026,cda-server-6,24,-166.39962864199666,{},10440,10.157.146.6,{},-141.56123354539693,0,1200,2025-08-30_03-13-15,435,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756516395,50.0,522000,45398.37049865723,91.79583668708801,435
+523200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92707.667, 'num_steps_sampled': 523200, 'update_time_ms': 2.606, 'num_steps_trained': 523200, 'load_time_ms': 0.696, 'default': {'kl': 0.015608757734298706, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.019153594970703, 'total_loss': 14.81684684753418, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13161876797676086, 'vf_explained_var': 0.9873616099357605, 'vf_loss': 14.932661056518555}, 'grad_time_ms': 775.044}",3934253,45501.78622722626,-153.08923072182094,cda-server-6,24,-166.39962864199666,{},10464,10.157.146.6,{},-141.56123354539693,0,1200,2025-08-30_03-14-59,436,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756516499,50.0,523200,45501.78622722626,103.41572856903076,436
+524400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93646.448, 'num_steps_sampled': 524400, 'update_time_ms': 2.68, 'num_steps_trained': 524400, 'load_time_ms': 0.696, 'default': {'kl': 0.01425144076347351, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.15731430053711, 'total_loss': 30.321533203125, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1165459007024765, 'vf_explained_var': 0.976507306098938, 'vf_loss': 30.423648834228516}, 'grad_time_ms': 775.914}",3934253,45593.028044462204,-153.07520775521334,cda-server-6,24,-166.39962864199666,{},10488,10.157.146.6,{},-147.22449379964385,0,1200,2025-08-30_03-16-30,437,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756516590,50.0,524400,45593.028044462204,91.24181723594666,437
+525600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 91598.038, 'num_steps_sampled': 525600, 'update_time_ms': 2.7, 'num_steps_trained': 525600, 'load_time_ms': 0.683, 'default': {'kl': 0.014275365509092808, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.111166000366211, 'total_loss': 21.981903076171875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13030636310577393, 'vf_explained_var': 0.9825233817100525, 'vf_loss': 22.097755432128906}, 'grad_time_ms': 775.419}",3934253,45676.183108091354,-152.90180067816212,cda-server-6,24,-167.3678219403425,{},10512,10.157.146.6,{},-142.91137618987028,0,1200,2025-08-30_03-17-53,438,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756516673,50.0,525600,45676.183108091354,83.15506362915039,438
+526800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92189.35, 'num_steps_sampled': 526800, 'update_time_ms': 2.637, 'num_steps_trained': 526800, 'load_time_ms': 0.682, 'default': {'kl': 0.014680023305118084, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.524951934814453, 'total_loss': 16.266441345214844, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12172594666481018, 'vf_explained_var': 0.9866619110107422, 'vf_loss': 16.37330436706543}, 'grad_time_ms': 764.032}",3934253,45784.80823278427,-152.5567861979773,cda-server-6,24,-167.3678219403425,{},10536,10.157.146.6,{},-140.51117984519468,0,1200,2025-08-30_03-19-42,439,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756516782,50.0,526800,45784.80823278427,108.62512469291687,439
+528000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 91884.362, 'num_steps_sampled': 528000, 'update_time_ms': 2.573, 'num_steps_trained': 528000, 'load_time_ms': 0.68, 'default': {'kl': 0.012988438829779625, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.752195358276367, 'total_loss': 14.445332527160645, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12197425961494446, 'vf_explained_var': 0.9885706305503845, 'vf_loss': 14.554155349731445}, 'grad_time_ms': 769.323}",3934253,45865.32222414017,-152.43594000190504,cda-server-6,24,-167.3678219403425,{},10560,10.157.146.6,{},-140.51117984519468,0,1200,2025-08-30_03-21-02,440,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756516862,50.0,528000,45865.32222414017,80.513991355896,440
+529200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92350.812, 'num_steps_sampled': 529200, 'update_time_ms': 2.622, 'num_steps_trained': 529200, 'load_time_ms': 0.68, 'default': {'kl': 0.014562004245817661, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.24899673461914, 'total_loss': 13.435138702392578, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12867429852485657, 'vf_explained_var': 0.9890771508216858, 'vf_loss': 13.549068450927734}, 'grad_time_ms': 775.906}",3934253,45943.03843998909,-152.21569410457278,cda-server-6,24,-167.3678219403425,{},10584,10.157.146.6,{},-140.51117984519468,0,1200,2025-08-30_03-22-20,441,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756516940,50.0,529200,45943.03843998909,77.71621584892273,441
+530400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92189.01, 'num_steps_sampled': 530400, 'update_time_ms': 2.579, 'num_steps_trained': 530400, 'load_time_ms': 0.662, 'default': {'kl': 0.0120732756331563, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.865851402282715, 'total_loss': 20.69765853881836, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11634629219770432, 'vf_explained_var': 0.9846157431602478, 'vf_loss': 20.801780700683594}, 'grad_time_ms': 769.497}",3934253,46021.12298822403,-152.15796697777017,cda-server-6,24,-163.17462601974535,{},10608,10.157.146.6,{},-140.51117984519468,0,1200,2025-08-30_03-23-38,442,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756517018,50.0,530400,46021.12298822403,78.08454823493958,442
+531600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92083.776, 'num_steps_sampled': 531600, 'update_time_ms': 2.534, 'num_steps_trained': 531600, 'load_time_ms': 0.624, 'default': {'kl': 0.012755469419062138, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.95416259765625, 'total_loss': 24.801151275634766, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1164705753326416, 'vf_explained_var': 0.979554295539856, 'vf_loss': 24.904706954956055}, 'grad_time_ms': 776.602}",3934253,46111.20990753174,-152.36619517929225,cda-server-6,24,-165.71090088526697,{},10632,10.157.146.6,{},-149.24255595970118,0,1200,2025-08-30_03-25-08,443,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756517108,50.0,531600,46111.20990753174,90.08691930770874,443
+532800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 88291.327, 'num_steps_sampled': 532800, 'update_time_ms': 2.566, 'num_steps_trained': 532800, 'load_time_ms': 0.622, 'default': {'kl': 0.01175283920019865, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.902250289916992, 'total_loss': 21.50499725341797, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12178131937980652, 'vf_explained_var': 0.9841468334197998, 'vf_loss': 21.614879608154297}, 'grad_time_ms': 754.912}",3934253,46197.119389534,-152.58799610219123,cda-server-6,24,-165.71090088526697,{},10656,10.157.146.6,{},-148.19138459858985,0,1200,2025-08-30_03-26-34,444,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756517194,50.0,532800,46197.119389534,85.9094820022583,444
+534000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 88752.555, 'num_steps_sampled': 534000, 'update_time_ms': 2.491, 'num_steps_trained': 534000, 'load_time_ms': 0.629, 'default': {'kl': 0.014107207767665386, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.85167407989502, 'total_loss': 16.065641403198242, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1293773055076599, 'vf_explained_var': 0.9869747161865234, 'vf_loss': 16.180734634399414}, 'grad_time_ms': 756.331}",3934253,46293.54178571701,-152.4441623789422,cda-server-6,24,-165.71090088526697,{},10680,10.157.146.6,{},-148.19138459858985,0,1200,2025-08-30_03-28-11,445,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756517291,50.0,534000,46293.54178571701,96.42239618301392,445
+535200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 87968.815, 'num_steps_sampled': 535200, 'update_time_ms': 2.489, 'num_steps_trained': 535200, 'load_time_ms': 0.635, 'default': {'kl': 0.014973337762057781, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.602691650390625, 'total_loss': 27.49502182006836, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14575064182281494, 'vf_explained_var': 0.97819584608078, 'vf_loss': 27.6256103515625}, 'grad_time_ms': 764.849}",3934253,46389.205899477005,-152.85829004269831,cda-server-6,24,-166.8271505938669,{},10704,10.157.146.6,{},-139.14836065978687,0,1200,2025-08-30_03-29-46,446,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756517386,50.0,535200,46389.205899477005,95.6641137599945,446
+536400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 87626.363, 'num_steps_sampled': 536400, 'update_time_ms': 2.417, 'num_steps_trained': 536400, 'load_time_ms': 0.643, 'default': {'kl': 0.013655421324074268, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.15749740600586, 'total_loss': 25.52581787109375, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12191127240657806, 'vf_explained_var': 0.9806229472160339, 'vf_loss': 25.63390350341797}, 'grad_time_ms': 763.984}",3934253,46477.0133357048,-152.46147072903958,cda-server-6,24,-166.8271505938669,{},10728,10.157.146.6,{},-139.14836065978687,0,1200,2025-08-30_03-31-14,447,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756517474,50.0,536400,46477.0133357048,87.80743622779846,447
+537600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 89246.139, 'num_steps_sampled': 537600, 'update_time_ms': 2.385, 'num_steps_trained': 537600, 'load_time_ms': 0.627, 'default': {'kl': 0.01547261606901884, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.981388092041016, 'total_loss': 20.75351333618164, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12472319602966309, 'vf_explained_var': 0.9833012819290161, 'vf_loss': 20.86256980895996}, 'grad_time_ms': 766.874}",3934253,46576.39440321922,-152.82945239896245,cda-server-6,24,-169.23393279477395,{},10752,10.157.146.6,{},-135.55991159320467,0,1200,2025-08-30_03-32-54,448,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756517574,50.0,537600,46576.39440321922,99.38106751441956,448
+538800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 86122.555, 'num_steps_sampled': 538800, 'update_time_ms': 2.412, 'num_steps_trained': 538800, 'load_time_ms': 0.634, 'default': {'kl': 0.013270992785692215, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.998639106750488, 'total_loss': 17.40472412109375, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12813928723335266, 'vf_explained_var': 0.9874011278152466, 'vf_loss': 17.519426345825195}, 'grad_time_ms': 779.815}",3934253,46653.91430091858,-152.52047795142636,cda-server-6,24,-169.23393279477395,{},10776,10.157.146.6,{},-135.55991159320467,0,1200,2025-08-30_03-34-11,449,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756517651,50.0,538800,46653.91430091858,77.51989769935608,449
+540000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 84794.443, 'num_steps_sampled': 540000, 'update_time_ms': 2.416, 'num_steps_trained': 540000, 'load_time_ms': 0.635, 'default': {'kl': 0.013216478750109673, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.117257118225098, 'total_loss': 21.370153427124023, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13916881382465363, 'vf_explained_var': 0.9849511384963989, 'vf_loss': 21.49593734741211}, 'grad_time_ms': 779.671}",3934253,46721.145233392715,-152.30260405709933,cda-server-6,24,-169.23393279477395,{},10800,10.157.146.6,{},-135.55991159320467,0,1200,2025-08-30_03-35-18,450,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756517718,50.0,540000,46721.145233392715,67.23093247413635,450
+541200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 86414.3, 'num_steps_sampled': 541200, 'update_time_ms': 2.406, 'num_steps_trained': 541200, 'load_time_ms': 0.637, 'default': {'kl': 0.011747285723686218, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.846776962280273, 'total_loss': 35.44596862792969, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12402357161045074, 'vf_explained_var': 0.97218257188797, 'vf_loss': 35.558101654052734}, 'grad_time_ms': 771.091}",3934253,46814.974937200546,-152.2823230757062,cda-server-6,24,-169.23393279477395,{},10824,10.157.146.6,{},-135.55991159320467,0,1200,2025-08-30_03-36-52,451,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756517812,50.0,541200,46814.974937200546,93.82970380783081,451
+542400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 88969.508, 'num_steps_sampled': 542400, 'update_time_ms': 2.502, 'num_steps_trained': 542400, 'load_time_ms': 0.641, 'default': {'kl': 0.014603732153773308, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.174540519714355, 'total_loss': 17.92264747619629, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1435505747795105, 'vf_explained_var': 0.9859344959259033, 'vf_loss': 18.051414489746094}, 'grad_time_ms': 775.126}",3934253,46918.65322470665,-152.32697364347348,cda-server-6,24,-169.23393279477395,{},10848,10.157.146.6,{},-140.8153902235786,0,1200,2025-08-30_03-38-36,452,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756517916,50.0,542400,46918.65322470665,103.67828750610352,452
+543600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 89585.511, 'num_steps_sampled': 543600, 'update_time_ms': 2.542, 'num_steps_trained': 543600, 'load_time_ms': 0.653, 'default': {'kl': 0.014241022989153862, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.966078758239746, 'total_loss': 21.33045196533203, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12030242383480072, 'vf_explained_var': 0.9839779734611511, 'vf_loss': 21.43633460998535}, 'grad_time_ms': 772.482}",3934253,47014.87502336502,-152.05236692518466,cda-server-6,24,-166.94974357746918,{},10872,10.157.146.6,{},-140.8153902235786,0,1200,2025-08-30_03-40-12,453,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756518012,50.0,543600,47014.87502336502,96.22179865837097,453
+544800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 89867.981, 'num_steps_sampled': 544800, 'update_time_ms': 2.514, 'num_steps_trained': 544800, 'load_time_ms': 0.654, 'default': {'kl': 0.013131268322467804, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.861820220947266, 'total_loss': 20.30666160583496, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1259278655052185, 'vf_explained_var': 0.9839560389518738, 'vf_loss': 20.419294357299805}, 'grad_time_ms': 781.308}",3934253,47103.69718146324,-152.21997083426987,cda-server-6,24,-167.6812075607711,{},10896,10.157.146.6,{},-143.9082405590833,0,1200,2025-08-30_03-41-41,454,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756518101,50.0,544800,47103.69718146324,88.82215809822083,454
+546000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 89019.008, 'num_steps_sampled': 546000, 'update_time_ms': 2.574, 'num_steps_trained': 546000, 'load_time_ms': 0.646, 'default': {'kl': 0.013202676549553871, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.931380271911621, 'total_loss': 20.238691329956055, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12394557893276215, 'vf_explained_var': 0.9842751026153564, 'vf_loss': 20.349267959594727}, 'grad_time_ms': 776.822}",3934253,47191.58376741409,-152.22845068858172,cda-server-6,24,-167.6812075607711,{},10920,10.157.146.6,{},-143.9082405590833,0,1200,2025-08-30_03-43-09,455,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756518189,50.0,546000,47191.58376741409,87.88658595085144,455
+547200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 87597.737, 'num_steps_sampled': 547200, 'update_time_ms': 2.56, 'num_steps_trained': 547200, 'load_time_ms': 0.645, 'default': {'kl': 0.013700922951102257, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.581720352172852, 'total_loss': 15.576580047607422, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11471442133188248, 'vf_explained_var': 0.9878559112548828, 'vf_loss': 15.677420616149902}, 'grad_time_ms': 778.745}",3934253,47273.0536134243,-151.89341081545788,cda-server-6,24,-167.6812075607711,{},10944,10.157.146.6,{},-143.9082405590833,0,1200,2025-08-30_03-44-30,456,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756518270,50.0,547200,47273.0536134243,81.46984601020813,456
+548400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 87216.144, 'num_steps_sampled': 548400, 'update_time_ms': 2.516, 'num_steps_trained': 548400, 'load_time_ms': 0.642, 'default': {'kl': 0.01370406523346901, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.93803596496582, 'total_loss': 12.964447975158691, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13340796530246735, 'vf_explained_var': 0.9900917410850525, 'vf_loss': 13.083980560302734}, 'grad_time_ms': 780.955}",3934253,47357.06767082214,-152.15835333609377,cda-server-6,24,-167.6812075607711,{},10968,10.157.146.6,{},-144.85171769932617,0,1200,2025-08-30_03-45-54,457,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756518354,50.0,548400,47357.06767082214,84.01405739784241,457
+549600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 85739.61, 'num_steps_sampled': 549600, 'update_time_ms': 2.575, 'num_steps_trained': 549600, 'load_time_ms': 0.634, 'default': {'kl': 0.014458566904067993, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.13646411895752, 'total_loss': 24.75263214111328, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1529289335012436, 'vf_explained_var': 0.9856938123703003, 'vf_loss': 24.89092254638672}, 'grad_time_ms': 787.371}",3934253,47441.748109817505,-152.45268015512374,cda-server-6,24,-163.47316165078425,{},10992,10.157.146.6,{},-141.83832716227093,0,1200,2025-08-30_03-47-19,458,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756518439,50.0,549600,47441.748109817505,84.68043899536133,458
+550800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 86245.352, 'num_steps_sampled': 550800, 'update_time_ms': 2.582, 'num_steps_trained': 550800, 'load_time_ms': 0.629, 'default': {'kl': 0.013391264714300632, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.823755264282227, 'total_loss': 14.624773025512695, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13322040438652039, 'vf_explained_var': 0.9883681535720825, 'vf_loss': 14.744434356689453}, 'grad_time_ms': 775.532}",3934253,47524.207596063614,-152.56532788778122,cda-server-6,24,-167.05595446037233,{},11016,10.157.146.6,{},-141.83832716227093,0,1200,2025-08-30_03-48-41,459,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756518521,50.0,550800,47524.207596063614,82.45948624610901,459
+552000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 87682.644, 'num_steps_sampled': 552000, 'update_time_ms': 2.574, 'num_steps_trained': 552000, 'load_time_ms': 0.628, 'default': {'kl': 0.013323888182640076, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.730342864990234, 'total_loss': 11.686019897460938, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12599381804466248, 'vf_explained_var': 0.9910435080528259, 'vf_loss': 11.798521995544434}, 'grad_time_ms': 762.306}",3934253,47605.679342508316,-152.69599782071492,cda-server-6,24,-167.15027913277754,{},11040,10.157.146.6,{},-141.83832716227093,0,1200,2025-08-30_03-50-03,460,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756518603,50.0,552000,47605.679342508316,81.47174644470215,460
+553200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 85658.694, 'num_steps_sampled': 553200, 'update_time_ms': 2.538, 'num_steps_trained': 553200, 'load_time_ms': 0.629, 'default': {'kl': 0.013982264325022697, 'cur_lr': 4.999999873689376e-05, 'entropy': 9.205830574035645, 'total_loss': 15.154325485229492, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14719703793525696, 'vf_explained_var': 0.9879705905914307, 'vf_loss': 15.28736400604248}, 'grad_time_ms': 771.098}",3934253,47679.35620856285,-152.83043235107942,cda-server-6,24,-167.15027913277754,{},11064,10.157.146.6,{},-141.83832716227093,0,1200,2025-08-30_03-51-17,461,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756518677,50.0,553200,47679.35620856285,73.67686605453491,461
+554400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 83275.878, 'num_steps_sampled': 554400, 'update_time_ms': 2.468, 'num_steps_trained': 554400, 'load_time_ms': 0.626, 'default': {'kl': 0.012878802604973316, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.50555419921875, 'total_loss': 23.786239624023438, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10327385365962982, 'vf_explained_var': 0.9826846718788147, 'vf_loss': 23.876474380493164}, 'grad_time_ms': 775.122}",3934253,47759.24594569206,-152.7505983426883,cda-server-6,24,-167.15027913277754,{},11088,10.157.146.6,{},-142.76403805622115,0,1200,2025-08-30_03-52-37,462,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756518757,50.0,554400,47759.24594569206,79.88973712921143,462
+555600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 82794.033, 'num_steps_sampled': 555600, 'update_time_ms': 2.455, 'num_steps_trained': 555600, 'load_time_ms': 0.617, 'default': {'kl': 0.015895912423729897, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.813871383666992, 'total_loss': 14.076557159423828, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13516120612621307, 'vf_explained_var': 0.9894052147865295, 'vf_loss': 14.195623397827148}, 'grad_time_ms': 782.395}",3934253,47850.721262931824,-152.2892577230175,cda-server-6,24,-167.15027913277754,{},11112,10.157.146.6,{},-141.34770473592064,0,1200,2025-08-30_03-54-08,463,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756518848,50.0,555600,47850.721262931824,91.47531723976135,463
+556800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 82909.562, 'num_steps_sampled': 556800, 'update_time_ms': 2.463, 'num_steps_trained': 556800, 'load_time_ms': 0.611, 'default': {'kl': 0.011852155439555645, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.566987991333008, 'total_loss': 18.03278350830078, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12470168620347977, 'vf_explained_var': 0.9847335815429688, 'vf_loss': 18.14548683166504}, 'grad_time_ms': 789.835}",3934253,47940.77296257019,-152.30313201018302,cda-server-6,24,-167.06550295241124,{},11136,10.157.146.6,{},-141.34770473592064,0,1200,2025-08-30_03-55-38,464,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756518938,50.0,556800,47940.77296257019,90.0516996383667,464
+558000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 84216.927, 'num_steps_sampled': 558000, 'update_time_ms': 2.396, 'num_steps_trained': 558000, 'load_time_ms': 0.608, 'default': {'kl': 0.012260083109140396, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.692615509033203, 'total_loss': 18.573740005493164, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11373353004455566, 'vf_explained_var': 0.9856255650520325, 'vf_loss': 18.675060272216797}, 'grad_time_ms': 791.581}",3934253,48041.7510638237,-152.37701671887567,cda-server-6,24,-167.06550295241124,{},11160,10.157.146.6,{},-141.3475296198611,0,1200,2025-08-30_03-57-19,465,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756519039,50.0,558000,48041.7510638237,100.97810125350952,465
+559200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 86656.837, 'num_steps_sampled': 559200, 'update_time_ms': 2.46, 'num_steps_trained': 559200, 'load_time_ms': 0.605, 'default': {'kl': 0.012025618925690651, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.53043270111084, 'total_loss': 22.949411392211914, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.09604374319314957, 'vf_explained_var': 0.9813482761383057, 'vf_loss': 23.03327751159668}, 'grad_time_ms': 780.629}",3934253,48147.51311826706,-152.1334569143758,cda-server-6,24,-165.1671981814466,{},11184,10.157.146.6,{},-141.3475296198611,0,1200,2025-08-30_03-59-05,466,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756519145,50.0,559200,48147.51311826706,105.76205444335938,466
+560400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 84268.461, 'num_steps_sampled': 560400, 'update_time_ms': 2.471, 'num_steps_trained': 560400, 'load_time_ms': 0.596, 'default': {'kl': 0.013515968807041645, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.63956069946289, 'total_loss': 35.26797103881836, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1302550584077835, 'vf_explained_var': 0.9756333827972412, 'vf_loss': 35.38453674316406}, 'grad_time_ms': 772.352}",3934253,48207.5612487793,-152.34913469895858,cda-server-6,24,-165.1671981814466,{},11208,10.157.146.6,{},-141.3475296198611,0,1200,2025-08-30_04-00-05,467,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756519205,50.0,560400,48207.5612487793,60.04813051223755,467
+561600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 86247.813, 'num_steps_sampled': 561600, 'update_time_ms': 2.396, 'num_steps_trained': 561600, 'load_time_ms': 0.616, 'default': {'kl': 0.01517908088862896, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.516483306884766, 'total_loss': 25.675064086914062, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13278795778751373, 'vf_explained_var': 0.9793300628662109, 'vf_loss': 25.79248046875}, 'grad_time_ms': 735.951}",3934253,48311.67094898224,-152.33652022821164,cda-server-6,24,-171.78358159200687,{},11232,10.157.146.6,{},-141.3475296198611,0,1200,2025-08-30_04-01-49,468,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756519309,50.0,561600,48311.67094898224,104.1097002029419,468
+562800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 85308.638, 'num_steps_sampled': 562800, 'update_time_ms': 2.412, 'num_steps_trained': 562800, 'load_time_ms': 0.631, 'default': {'kl': 0.014354195445775986, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.716632843017578, 'total_loss': 18.791744232177734, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13407041132450104, 'vf_explained_var': 0.9842908382415771, 'vf_loss': 18.911279678344727}, 'grad_time_ms': 716.768}",3934253,48384.54709339142,-152.1603091373918,cda-server-6,24,-171.78358159200687,{},11256,10.157.146.6,{},-142.66110503693395,0,1200,2025-08-30_04-03-02,469,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756519382,50.0,562800,48384.54709339142,72.87614440917969,469
+564000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 85557.386, 'num_steps_sampled': 564000, 'update_time_ms': 2.487, 'num_steps_trained': 564000, 'load_time_ms': 0.629, 'default': {'kl': 0.012945041991770267, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.680624008178711, 'total_loss': 14.99290657043457, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1172991693019867, 'vf_explained_var': 0.9875587821006775, 'vf_loss': 15.09709644317627}, 'grad_time_ms': 727.509}",3934253,48468.61529612541,-152.16666849056782,cda-server-6,24,-171.78358159200687,{},11280,10.157.146.6,{},-142.66110503693395,0,1200,2025-08-30_04-04-26,470,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756519466,50.0,564000,48468.61529612541,84.06820273399353,470
+565200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 84829.604, 'num_steps_sampled': 565200, 'update_time_ms': 2.513, 'num_steps_trained': 565200, 'load_time_ms': 0.628, 'default': {'kl': 0.013851411640644073, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.776378631591797, 'total_loss': 18.787683486938477, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12572117149829865, 'vf_explained_var': 0.9839603900909424, 'vf_loss': 18.89937973022461}, 'grad_time_ms': 720.516}",3934253,48534.94466614723,-151.8701079268161,cda-server-6,24,-171.78358159200687,{},11304,10.157.146.6,{},-140.12124004568955,0,1200,2025-08-30_04-05-32,471,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756519532,50.0,565200,48534.94466614723,66.32937002182007,471
+566400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 87558.517, 'num_steps_sampled': 566400, 'update_time_ms': 2.515, 'num_steps_trained': 566400, 'load_time_ms': 0.633, 'default': {'kl': 0.014027898199856281, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.957261085510254, 'total_loss': 13.951068878173828, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12973517179489136, 'vf_explained_var': 0.9884995222091675, 'vf_loss': 14.066600799560547}, 'grad_time_ms': 712.553}",3934253,48642.04382133484,-151.58204971842872,cda-server-6,24,-163.73309523071484,{},11328,10.157.146.6,{},-140.12124004568955,0,1200,2025-08-30_04-07-20,472,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756519640,50.0,566400,48642.04382133484,107.09915518760681,472
+567600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 89974.294, 'num_steps_sampled': 567600, 'update_time_ms': 2.496, 'num_steps_trained': 567600, 'load_time_ms': 0.64, 'default': {'kl': 0.013087683357298374, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.811269760131836, 'total_loss': 18.32082176208496, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12677785754203796, 'vf_explained_var': 0.9860605597496033, 'vf_loss': 18.43434715270996}, 'grad_time_ms': 711.183}",3934253,48757.663786411285,-151.90309347607905,cda-server-6,24,-172.55473715921238,{},11352,10.157.146.6,{},-140.12124004568955,0,1200,2025-08-30_04-09-15,473,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756519755,50.0,567600,48757.663786411285,115.61996507644653,473
+568800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 88965.22, 'num_steps_sampled': 568800, 'update_time_ms': 2.509, 'num_steps_trained': 568800, 'load_time_ms': 0.644, 'default': {'kl': 0.012866493314504623, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.510658264160156, 'total_loss': 26.638233184814453, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12175793200731277, 'vf_explained_var': 0.9810941219329834, 'vf_loss': 26.746965408325195}, 'grad_time_ms': 709.255}",3934253,48837.60676407814,-152.0306529598313,cda-server-6,24,-172.55473715921238,{},11376,10.157.146.6,{},-140.12124004568955,0,1200,2025-08-30_04-10-35,474,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756519835,50.0,568800,48837.60676407814,79.94297766685486,474
+570000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 90873.243, 'num_steps_sampled': 570000, 'update_time_ms': 2.543, 'num_steps_trained': 570000, 'load_time_ms': 0.644, 'default': {'kl': 0.014776766300201416, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.7134370803833, 'total_loss': 16.30389976501465, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12294605374336243, 'vf_explained_var': 0.98764967918396, 'vf_loss': 16.411884307861328}, 'grad_time_ms': 703.627}",3934253,48957.60911512375,-152.09120920384555,cda-server-6,24,-172.55473715921238,{},11400,10.157.146.6,{},-139.91291809163678,0,1200,2025-08-30_04-12-35,475,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756519955,50.0,570000,48957.60911512375,120.00235104560852,475
+571200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 90429.236, 'num_steps_sampled': 571200, 'update_time_ms': 2.448, 'num_steps_trained': 571200, 'load_time_ms': 0.642, 'default': {'kl': 0.01419066358357668, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.54990005493164, 'total_loss': 25.738264083862305, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1428092122077942, 'vf_explained_var': 0.9800757169723511, 'vf_loss': 25.866703033447266}, 'grad_time_ms': 706.412}",3934253,49058.956107616425,-152.22941988879649,cda-server-6,24,-172.55473715921238,{},11424,10.157.146.6,{},-136.4136753827509,0,1200,2025-08-30_04-14-17,476,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756520057,50.0,571200,49058.956107616425,101.34699249267578,476
+572400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93479.099, 'num_steps_sampled': 572400, 'update_time_ms': 2.507, 'num_steps_trained': 572400, 'load_time_ms': 0.642, 'default': {'kl': 0.013474556617438793, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.342805862426758, 'total_loss': 14.449737548828125, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11618823558092117, 'vf_explained_var': 0.9880461096763611, 'vf_loss': 14.55228328704834}, 'grad_time_ms': 705.134}",3934253,49149.48967766762,-151.62238611297823,cda-server-6,24,-164.26034009197124,{},11448,10.157.146.6,{},-136.4136753827509,0,1200,2025-08-30_04-15-47,477,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756520147,50.0,572400,49149.48967766762,90.53357005119324,477
+573600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93793.331, 'num_steps_sampled': 573600, 'update_time_ms': 2.49, 'num_steps_trained': 573600, 'load_time_ms': 0.626, 'default': {'kl': 0.01338463556021452, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.503240585327148, 'total_loss': 9.148031234741211, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13191653788089752, 'vf_explained_var': 0.9921321868896484, 'vf_loss': 9.266396522521973}, 'grad_time_ms': 741.434}",3934253,49257.10363698006,-151.4530620575066,cda-server-6,24,-164.26034009197124,{},11472,10.157.146.6,{},-135.57837804089226,0,1200,2025-08-30_04-17-35,478,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756520255,50.0,573600,49257.10363698006,107.61395931243896,478
+574800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 97189.048, 'num_steps_sampled': 574800, 'update_time_ms': 2.439, 'num_steps_trained': 574800, 'load_time_ms': 0.615, 'default': {'kl': 0.011861172504723072, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.364619255065918, 'total_loss': 19.182300567626953, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10482161492109299, 'vf_explained_var': 0.9860363006591797, 'vf_loss': 19.27511215209961}, 'grad_time_ms': 755.483}",3934253,49364.076297044754,-151.58091026932988,cda-server-6,24,-166.55116997370476,{},11496,10.157.146.6,{},-135.57837804089226,0,1200,2025-08-30_04-19-22,479,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756520362,50.0,574800,49364.076297044754,106.97266006469727,479
+576000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 98683.422, 'num_steps_sampled': 576000, 'update_time_ms': 2.427, 'num_steps_trained': 576000, 'load_time_ms': 0.622, 'default': {'kl': 0.01613686792552471, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.356700897216797, 'total_loss': 24.950077056884766, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12751537561416626, 'vf_explained_var': 0.9812971949577332, 'vf_loss': 25.06125259399414}, 'grad_time_ms': 730.311}",3934253,49462.835492134094,-151.87481690855805,cda-server-6,24,-166.55116997370476,{},11520,10.157.146.6,{},-135.57837804089226,0,1200,2025-08-30_04-21-00,480,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756520460,50.0,576000,49462.835492134094,98.75919508934021,480
+577200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 100784.428, 'num_steps_sampled': 577200, 'update_time_ms': 2.385, 'num_steps_trained': 577200, 'load_time_ms': 0.616, 'default': {'kl': 0.012163571082055569, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.392812728881836, 'total_loss': 25.000938415527344, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12458840012550354, 'vf_explained_var': 0.9825544357299805, 'vf_loss': 25.11321258544922}, 'grad_time_ms': 737.015}",3934253,49550.242958545685,-151.85538016827485,cda-server-6,24,-166.55116997370476,{},11544,10.157.146.6,{},-135.57837804089226,0,1200,2025-08-30_04-22-28,481,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756520548,50.0,577200,49550.242958545685,87.40746641159058,481
+578400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 100695.819, 'num_steps_sampled': 578400, 'update_time_ms': 2.41, 'num_steps_trained': 578400, 'load_time_ms': 0.609, 'default': {'kl': 0.013876695185899734, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.447154998779297, 'total_loss': 19.025917053222656, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12835945188999176, 'vf_explained_var': 0.9851945042610168, 'vf_loss': 19.14022445678711}, 'grad_time_ms': 737.976}",3934253,49656.465804338455,-152.47601961931696,cda-server-6,24,-168.89862092308448,{},11568,10.157.146.6,{},-145.96097054937832,0,1200,2025-08-30_04-24-14,482,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756520654,50.0,578400,49656.465804338455,106.22284579277039,482
+579600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 98965.575, 'num_steps_sampled': 579600, 'update_time_ms': 2.424, 'num_steps_trained': 579600, 'load_time_ms': 0.602, 'default': {'kl': 0.012843552976846695, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.421581268310547, 'total_loss': 28.655893325805664, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1217130795121193, 'vf_explained_var': 0.981257438659668, 'vf_loss': 28.764604568481445}, 'grad_time_ms': 737.339}",3934253,49754.775631427765,-152.4899519121384,cda-server-6,24,-168.89862092308448,{},11592,10.157.146.6,{},-145.96097054937832,0,1200,2025-08-30_04-25-53,483,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756520753,50.0,579600,49754.775631427765,98.30982708930969,483
+580800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 99783.682, 'num_steps_sampled': 580800, 'update_time_ms': 2.383, 'num_steps_trained': 580800, 'load_time_ms': 0.601, 'default': {'kl': 0.01166777778416872, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.177492141723633, 'total_loss': 34.3708610534668, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11253535747528076, 'vf_explained_var': 0.9757702350616455, 'vf_loss': 34.47157669067383}, 'grad_time_ms': 733.529}",3934253,49842.862073898315,-152.56202090243394,cda-server-6,24,-168.89862092308448,{},11616,10.157.146.6,{},-139.9760204444557,0,1200,2025-08-30_04-27-21,484,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756520841,50.0,580800,49842.862073898315,88.08644247055054,484
+582000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 98990.706, 'num_steps_sampled': 582000, 'update_time_ms': 2.384, 'num_steps_trained': 582000, 'load_time_ms': 0.606, 'default': {'kl': 0.013997341506183147, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.29512882232666, 'total_loss': 22.684200286865234, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12494519352912903, 'vf_explained_var': 0.9818886518478394, 'vf_loss': 22.79497528076172}, 'grad_time_ms': 736.962}",3934253,49954.96886229515,-152.57537375459523,cda-server-6,24,-168.89862092308448,{},11640,10.157.146.6,{},-139.9760204444557,0,1200,2025-08-30_04-29-13,485,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756520953,50.0,582000,49954.96886229515,112.10678839683533,485
+583200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 96576.534, 'num_steps_sampled': 583200, 'update_time_ms': 2.437, 'num_steps_trained': 583200, 'load_time_ms': 0.607, 'default': {'kl': 0.013433815911412239, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.316619873046875, 'total_loss': 12.3467378616333, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13029375672340393, 'vf_explained_var': 0.9899523258209229, 'vf_loss': 12.46342945098877}, 'grad_time_ms': 731.844}",3934253,50032.12375879288,-152.43522240052872,cda-server-6,24,-166.16920054641602,{},11664,10.157.146.6,{},-139.9760204444557,0,1200,2025-08-30_04-30-30,486,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756521030,50.0,583200,50032.12375879288,77.15489649772644,486
+584400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 97872.595, 'num_steps_sampled': 584400, 'update_time_ms': 2.376, 'num_steps_trained': 584400, 'load_time_ms': 0.613, 'default': {'kl': 0.01336054690182209, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.06357479095459, 'total_loss': 14.853938102722168, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11478282511234283, 'vf_explained_var': 0.9875580668449402, 'vf_loss': 14.955193519592285}, 'grad_time_ms': 744.769}",3934253,50135.74773335457,-152.2744356165756,cda-server-6,24,-166.16920054641602,{},11688,10.157.146.6,{},-139.9760204444557,0,1200,2025-08-30_04-32-14,487,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756521134,50.0,584400,50135.74773335457,103.62397456169128,487
+585600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93514.198, 'num_steps_sampled': 585600, 'update_time_ms': 2.391, 'num_steps_trained': 585600, 'load_time_ms': 0.623, 'default': {'kl': 0.012713328003883362, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.406123161315918, 'total_loss': 9.236263275146484, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13359849154949188, 'vf_explained_var': 0.9921019673347473, 'vf_loss': 9.356989860534668}, 'grad_time_ms': 744.189}",3934253,50199.77256655693,-151.7673940732708,cda-server-6,24,-163.00463867105913,{},11712,10.157.146.6,{},-141.86458163390066,0,1200,2025-08-30_04-33-18,488,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756521198,50.0,585600,50199.77256655693,64.02483320236206,488
+586800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 90967.011, 'num_steps_sampled': 586800, 'update_time_ms': 2.398, 'num_steps_trained': 586800, 'load_time_ms': 0.616, 'default': {'kl': 0.01239168830215931, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.362218856811523, 'total_loss': 16.686716079711914, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13526107370853424, 'vf_explained_var': 0.9881305694580078, 'vf_loss': 16.809431076049805}, 'grad_time_ms': 760.819}",3934253,50281.43963265419,-151.5349348740325,cda-server-6,24,-162.58932785547924,{},11736,10.157.146.6,{},-141.86458163390066,0,1200,2025-08-30_04-34-39,489,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756521279,50.0,586800,50281.43963265419,81.66706609725952,489
+588000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92896.958, 'num_steps_sampled': 588000, 'update_time_ms': 2.35, 'num_steps_trained': 588000, 'load_time_ms': 0.613, 'default': {'kl': 0.012361129745841026, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.23472785949707, 'total_loss': 17.43859100341797, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10937649011611938, 'vf_explained_var': 0.9870246052742004, 'vf_loss': 17.53545379638672}, 'grad_time_ms': 782.39}",3934253,50399.713121175766,-151.58134547998327,cda-server-6,24,-166.551374223564,{},11760,10.157.146.6,{},-141.86458163390066,0,1200,2025-08-30_04-36-38,490,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756521398,50.0,588000,50399.713121175766,118.27348852157593,490
+589200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92667.249, 'num_steps_sampled': 589200, 'update_time_ms': 2.376, 'num_steps_trained': 589200, 'load_time_ms': 0.62, 'default': {'kl': 0.013915492221713066, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.18729019165039, 'total_loss': 21.70315170288086, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11174440383911133, 'vf_explained_var': 0.9853160381317139, 'vf_loss': 21.80080795288086}, 'grad_time_ms': 755.628}",3934253,50484.555617809296,-151.50221318432668,cda-server-6,24,-166.551374223564,{},11784,10.157.146.6,{},-141.86458163390066,0,1200,2025-08-30_04-38-02,491,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756521482,50.0,589200,50484.555617809296,84.84249663352966,491
+590400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92900.83, 'num_steps_sampled': 590400, 'update_time_ms': 2.378, 'num_steps_trained': 590400, 'load_time_ms': 0.625, 'default': {'kl': 0.014017928391695023, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.306546211242676, 'total_loss': 17.22860336303711, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13493818044662476, 'vf_explained_var': 0.9866151809692383, 'vf_loss': 17.349348068237305}, 'grad_time_ms': 753.323}",3934253,50593.09105873108,-151.9797859047839,cda-server-6,24,-166.551374223564,{},11808,10.157.146.6,{},-142.82675790269593,0,1200,2025-08-30_04-39-51,492,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756521591,50.0,590400,50593.09105873108,108.53544092178345,492
+591600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 91710.711, 'num_steps_sampled': 591600, 'update_time_ms': 2.399, 'num_steps_trained': 591600, 'load_time_ms': 0.626, 'default': {'kl': 0.014049972407519817, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.360414505004883, 'total_loss': 17.243886947631836, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13506677746772766, 'vf_explained_var': 0.9864630103111267, 'vf_loss': 17.364728927612305}, 'grad_time_ms': 739.928}",3934253,50679.36665248871,-152.28057967114154,cda-server-6,24,-166.551374223564,{},11832,10.157.146.6,{},-142.94920052597337,0,1200,2025-08-30_04-41-17,493,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756521677,50.0,591600,50679.36665248871,86.2755937576294,493
+592800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94009.661, 'num_steps_sampled': 592800, 'update_time_ms': 2.403, 'num_steps_trained': 592800, 'load_time_ms': 0.626, 'default': {'kl': 0.012254327535629272, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.261372566223145, 'total_loss': 24.526485443115234, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11333407461643219, 'vf_explained_var': 0.98112553358078, 'vf_loss': 24.627412796020508}, 'grad_time_ms': 745.345}",3934253,50790.49547314644,-152.2664411603055,cda-server-6,24,-165.55358103574406,{},11856,10.157.146.6,{},-142.94920052597337,0,1200,2025-08-30_04-43-08,494,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756521788,50.0,592800,50790.49547314644,111.1288206577301,494
+594000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93719.151, 'num_steps_sampled': 594000, 'update_time_ms': 2.353, 'num_steps_trained': 594000, 'load_time_ms': 0.623, 'default': {'kl': 0.010306322015821934, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.569117546081543, 'total_loss': 38.06904983520508, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12895271182060242, 'vf_explained_var': 0.9726418256759644, 'vf_loss': 38.18756866455078}, 'grad_time_ms': 743.518}",3934253,50899.678308963776,-152.4053046281917,cda-server-6,24,-179.23954249428897,{},11880,10.157.146.6,{},-142.94920052597337,0,1200,2025-08-30_04-44-58,495,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756521898,50.0,594000,50899.678308963776,109.18283581733704,495
+595200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95340.754, 'num_steps_sampled': 595200, 'update_time_ms': 2.369, 'num_steps_trained': 595200, 'load_time_ms': 0.622, 'default': {'kl': 0.014301293529570103, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.337230682373047, 'total_loss': 23.737653732299805, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11855091154575348, 'vf_explained_var': 0.9828669428825378, 'vf_loss': 23.841726303100586}, 'grad_time_ms': 753.814}",3934253,50993.152535676956,-152.48441120424198,cda-server-6,24,-179.23954249428897,{},11904,10.157.146.6,{},-143.24911084280703,0,1200,2025-08-30_04-46-31,496,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756521991,50.0,595200,50993.152535676956,93.47422671318054,496
+596400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93168.107, 'num_steps_sampled': 596400, 'update_time_ms': 2.414, 'num_steps_trained': 596400, 'load_time_ms': 0.622, 'default': {'kl': 0.011234988458454609, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.054550170898438, 'total_loss': 32.349212646484375, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10083127021789551, 'vf_explained_var': 0.9761930108070374, 'vf_loss': 32.43867111206055}, 'grad_time_ms': 745.001}",3934253,51074.96179127693,-152.2923890436924,cda-server-6,24,-179.23954249428897,{},11928,10.157.146.6,{},-143.24911084280703,0,1200,2025-08-30_04-47-53,497,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756522073,50.0,596400,51074.96179127693,81.80925559997559,497
+597600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 96494.6, 'num_steps_sampled': 597600, 'update_time_ms': 2.411, 'num_steps_trained': 597600, 'load_time_ms': 0.606, 'default': {'kl': 0.013591241091489792, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.476066589355469, 'total_loss': 22.050931930541992, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13895396888256073, 'vf_explained_var': 0.9850466847419739, 'vf_loss': 22.176122665405273}, 'grad_time_ms': 745.967}",3934253,51172.260909318924,-152.56113232132276,cda-server-6,24,-179.23954249428897,{},11952,10.157.146.6,{},-143.24911084280703,0,1200,2025-08-30_04-49-30,498,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756522170,50.0,597600,51172.260909318924,97.29911804199219,498
+598800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 97191.043, 'num_steps_sampled': 598800, 'update_time_ms': 2.462, 'num_steps_trained': 598800, 'load_time_ms': 0.607, 'default': {'kl': 0.01398612093180418, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.27603530883789, 'total_loss': 11.802041053771973, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1316160410642624, 'vf_explained_var': 0.9910869598388672, 'vf_loss': 11.919496536254883}, 'grad_time_ms': 742.414}",3934253,51260.85743522644,-152.5133723821843,cda-server-6,24,-179.23954249428897,{},11976,10.157.146.6,{},-141.76588621311848,0,1200,2025-08-30_04-50-59,499,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756522259,50.0,598800,51260.85743522644,88.59652590751648,499
+600000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95862.436, 'num_steps_sampled': 600000, 'update_time_ms': 2.474, 'num_steps_trained': 600000, 'load_time_ms': 0.618, 'default': {'kl': 0.01472895685583353, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.435802459716797, 'total_loss': 18.317811965942383, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13494382798671722, 'vf_explained_var': 0.987172544002533, 'vf_loss': 18.437843322753906}, 'grad_time_ms': 734.813}",3934253,51365.77009224892,-152.01744841803412,cda-server-6,24,-168.45596695942382,{},12000,10.157.146.6,{},-141.76588621311848,0,1200,2025-08-30_04-52-44,500,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756522364,50.0,600000,51365.77009224892,104.9126570224762,500
+601200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95968.686, 'num_steps_sampled': 601200, 'update_time_ms': 2.619, 'num_steps_trained': 601200, 'load_time_ms': 0.609, 'default': {'kl': 0.012464533559978008, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.986090660095215, 'total_loss': 15.645466804504395, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12583625316619873, 'vf_explained_var': 0.9872433543205261, 'vf_loss': 15.758684158325195}, 'grad_time_ms': 736.879}",3934253,51451.69588470459,-152.01037099071374,cda-server-6,24,-168.45596695942382,{},12024,10.157.146.6,{},-141.76588621311848,0,1200,2025-08-30_04-54-10,501,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756522450,50.0,601200,51451.69588470459,85.92579245567322,501
+602400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93102.896, 'num_steps_sampled': 602400, 'update_time_ms': 2.611, 'num_steps_trained': 602400, 'load_time_ms': 0.613, 'default': {'kl': 0.012951488606631756, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.401609420776367, 'total_loss': 14.38691520690918, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11934472620487213, 'vf_explained_var': 0.9900305867195129, 'vf_loss': 14.493144989013672}, 'grad_time_ms': 738.486}",3934253,51531.589405059814,-151.48011399393036,cda-server-6,24,-164.17799719138918,{},12048,10.157.146.6,{},-141.76588621311848,0,1200,2025-08-30_04-55-30,502,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756522530,50.0,602400,51531.589405059814,79.89352035522461,502
+603600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92845.764, 'num_steps_sampled': 603600, 'update_time_ms': 2.592, 'num_steps_trained': 603600, 'load_time_ms': 0.611, 'default': {'kl': 0.012631156481802464, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.266646385192871, 'total_loss': 20.765531539916992, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13049106299877167, 'vf_explained_var': 0.9859540462493896, 'vf_loss': 20.88323402404785}, 'grad_time_ms': 749.962}",3934253,51615.408281326294,-151.95040094615155,cda-server-6,24,-164.17799719138918,{},12072,10.157.146.6,{},-142.91358491840785,0,1200,2025-08-30_04-56-53,503,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756522613,50.0,603600,51615.408281326294,83.81887626647949,503
+604800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 91623.159, 'num_steps_sampled': 604800, 'update_time_ms': 2.684, 'num_steps_trained': 604800, 'load_time_ms': 0.615, 'default': {'kl': 0.0131832305341959, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.261452674865723, 'total_loss': 21.291887283325195, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13001152873039246, 'vf_explained_var': 0.9833104610443115, 'vf_loss': 21.408550262451172}, 'grad_time_ms': 748.329}",3934253,51714.29527378082,-152.11592553945118,cda-server-6,24,-169.47437276213114,{},12096,10.157.146.6,{},-142.91358491840785,0,1200,2025-08-30_04-58-32,504,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756522712,50.0,604800,51714.29527378082,98.88699245452881,504
+606000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 91136.225, 'num_steps_sampled': 606000, 'update_time_ms': 2.73, 'num_steps_trained': 606000, 'load_time_ms': 0.622, 'default': {'kl': 0.012735579162836075, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.163020133972168, 'total_loss': 32.51218795776367, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12349916994571686, 'vf_explained_var': 0.9749259948730469, 'vf_loss': 32.62278747558594}, 'grad_time_ms': 744.669}",3934253,51818.57286596298,-152.21681742415493,cda-server-6,24,-174.96017940841094,{},12120,10.157.146.6,{},-142.91358491840785,0,1200,2025-08-30_05-00-17,505,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756522817,50.0,606000,51818.57286596298,104.27759218215942,505
+607200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 91654.218, 'num_steps_sampled': 607200, 'update_time_ms': 2.733, 'num_steps_trained': 607200, 'load_time_ms': 0.629, 'default': {'kl': 0.010551582090556622, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.147479057312012, 'total_loss': 24.14088249206543, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1115213930606842, 'vf_explained_var': 0.9835327863693237, 'vf_loss': 24.241722106933594}, 'grad_time_ms': 730.711}",3934253,51917.086246967316,-152.5065032769895,cda-server-6,24,-174.96017940841094,{},12144,10.157.146.6,{},-145.65283452681913,0,1200,2025-08-30_05-01-55,506,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756522915,50.0,607200,51917.086246967316,98.5133810043335,506
+608400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92937.584, 'num_steps_sampled': 608400, 'update_time_ms': 2.651, 'num_steps_trained': 608400, 'load_time_ms': 0.627, 'default': {'kl': 0.013679493218660355, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.322196006774902, 'total_loss': 17.807706832885742, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13080990314483643, 'vf_explained_var': 0.989324152469635, 'vf_loss': 17.924665451049805}, 'grad_time_ms': 723.807}",3934253,52011.65894627571,-152.62199344445517,cda-server-6,24,-174.96017940841094,{},12168,10.157.146.6,{},-145.65283452681913,0,1200,2025-08-30_05-03-30,507,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756523010,50.0,608400,52011.65894627571,94.57269930839539,507
+609600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94056.282, 'num_steps_sampled': 609600, 'update_time_ms': 2.661, 'num_steps_trained': 609600, 'load_time_ms': 0.636, 'default': {'kl': 0.01215057447552681, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.190593719482422, 'total_loss': 19.476789474487305, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1280270367860794, 'vf_explained_var': 0.9847090840339661, 'vf_loss': 19.592514038085938}, 'grad_time_ms': 700.216}",3934253,52119.90881872177,-152.45247066530314,cda-server-6,24,-174.96017940841094,{},12192,10.157.146.6,{},-145.65283452681913,0,1200,2025-08-30_05-05-18,508,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756523118,50.0,609600,52119.90881872177,108.24987244606018,508
+610800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95930.862, 'num_steps_sampled': 610800, 'update_time_ms': 2.656, 'num_steps_trained': 610800, 'load_time_ms': 0.642, 'default': {'kl': 0.012087873183190823, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.145371437072754, 'total_loss': 15.570717811584473, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10609038174152374, 'vf_explained_var': 0.986748456954956, 'vf_loss': 15.664569854736328}, 'grad_time_ms': 692.213}",3934253,52227.17141199112,-151.90335354787902,cda-server-6,24,-166.31865727551698,{},12216,10.157.146.6,{},-137.2146438832549,0,1200,2025-08-30_05-07-05,509,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756523225,50.0,610800,52227.17141199112,107.26259326934814,509
+612000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95583.025, 'num_steps_sampled': 612000, 'update_time_ms': 2.643, 'num_steps_trained': 612000, 'load_time_ms': 0.629, 'default': {'kl': 0.013264824636280537, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.246790885925293, 'total_loss': 45.29621505737305, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12984015047550201, 'vf_explained_var': 0.9686688184738159, 'vf_loss': 45.41261672973633}, 'grad_time_ms': 703.069}",3934253,52328.713398218155,-151.84296176241773,cda-server-6,24,-176.07474622271582,{},12240,10.157.146.6,{},-137.2146438832549,0,1200,2025-08-30_05-08-47,510,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756523327,50.0,612000,52328.713398218155,101.54198622703552,510
+613200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 97924.306, 'num_steps_sampled': 613200, 'update_time_ms': 2.498, 'num_steps_trained': 613200, 'load_time_ms': 0.628, 'default': {'kl': 0.01422956120222807, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.146068572998047, 'total_loss': 11.832422256469727, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13532468676567078, 'vf_explained_var': 0.99064040184021, 'vf_loss': 11.953340530395508}, 'grad_time_ms': 719.987}",3934253,52438.22039580345,-151.5841077330452,cda-server-6,24,-176.07474622271582,{},12264,10.157.146.6,{},-137.2146438832549,0,1200,2025-08-30_05-10-36,511,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756523436,50.0,613200,52438.22039580345,109.50699758529663,511
+614400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 102984.168, 'num_steps_sampled': 614400, 'update_time_ms': 2.622, 'num_steps_trained': 614400, 'load_time_ms': 0.65, 'default': {'kl': 0.01368715986609459, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.12667465209961, 'total_loss': 10.103468894958496, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12915891408920288, 'vf_explained_var': 0.9917065501213074, 'vf_loss': 10.218769073486328}, 'grad_time_ms': 721.669}",3934253,52568.73124575615,-151.36754937198694,cda-server-6,24,-176.07474622271582,{},12288,10.157.146.6,{},-135.6766711022273,0,1200,2025-08-30_05-12-47,512,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756523567,50.0,614400,52568.73124575615,130.51084995269775,512
+615600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 102785.433, 'num_steps_sampled': 615600, 'update_time_ms': 2.633, 'num_steps_trained': 615600, 'load_time_ms': 0.653, 'default': {'kl': 0.01425766758620739, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.303292274475098, 'total_loss': 15.37277889251709, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12664847075939178, 'vf_explained_var': 0.9880255460739136, 'vf_loss': 15.484992027282715}, 'grad_time_ms': 707.702}",3934253,52650.423523426056,-152.04099381919596,cda-server-6,24,-176.07474622271582,{},12312,10.157.146.6,{},-135.6766711022273,0,1200,2025-08-30_05-14-09,513,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756523649,50.0,615600,52650.423523426056,81.69227766990662,513
+616800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 100567.557, 'num_steps_sampled': 616800, 'update_time_ms': 2.548, 'num_steps_trained': 616800, 'load_time_ms': 0.652, 'default': {'kl': 0.014286945573985577, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.151420593261719, 'total_loss': 13.630146026611328, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13689583539962769, 'vf_explained_var': 0.9897435307502747, 'vf_loss': 13.752577781677246}, 'grad_time_ms': 716.56}",3934253,52727.22015619278,-152.27760701819744,cda-server-6,24,-170.78339176081246,{},12336,10.157.146.6,{},-135.6766711022273,0,1200,2025-08-30_05-15-25,514,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756523725,50.0,616800,52727.22015619278,76.79663276672363,514
+618000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 99823.146, 'num_steps_sampled': 618000, 'update_time_ms': 2.531, 'num_steps_trained': 618000, 'load_time_ms': 0.676, 'default': {'kl': 0.012816226109862328, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.943474769592285, 'total_loss': 11.330946922302246, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12894707918167114, 'vf_explained_var': 0.991180956363678, 'vf_loss': 11.446918487548828}, 'grad_time_ms': 717.567}",3934253,52824.06447529793,-152.1601072845309,cda-server-6,24,-167.464972589186,{},12360,10.157.146.6,{},-135.6766711022273,0,1200,2025-08-30_05-17-02,515,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756523822,50.0,618000,52824.06447529793,96.84431910514832,515
+619200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 96189.219, 'num_steps_sampled': 619200, 'update_time_ms': 2.517, 'num_steps_trained': 619200, 'load_time_ms': 0.669, 'default': {'kl': 0.012791362591087818, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.24141788482666, 'total_loss': 26.494110107421875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10487866401672363, 'vf_explained_var': 0.9813768863677979, 'vf_loss': 26.58603858947754}, 'grad_time_ms': 727.341}",3934253,52886.338240385056,-152.80565974195426,cda-server-6,24,-188.88444817631853,{},12384,10.157.146.6,{},-142.63657027284142,0,1200,2025-08-30_05-18-05,516,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756523885,50.0,619200,52886.338240385056,62.273765087127686,516
+620400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 98201.82, 'num_steps_sampled': 620400, 'update_time_ms': 2.59, 'num_steps_trained': 620400, 'load_time_ms': 0.669, 'default': {'kl': 0.014150070026516914, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.084158897399902, 'total_loss': 23.739412307739258, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11772307753562927, 'vf_explained_var': 0.9816194176673889, 'vf_loss': 23.84280776977539}, 'grad_time_ms': 744.37}",3934253,53001.20790696144,-152.8964755557505,cda-server-6,24,-188.88444817631853,{},12408,10.157.146.6,{},-142.63657027284142,0,1200,2025-08-30_05-19-59,517,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756523999,50.0,620400,53001.20790696144,114.8696665763855,517
+621600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 98627.304, 'num_steps_sampled': 621600, 'update_time_ms': 2.552, 'num_steps_trained': 621600, 'load_time_ms': 0.662, 'default': {'kl': 0.013758014887571335, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.227802276611328, 'total_loss': 17.798114776611328, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13919270038604736, 'vf_explained_var': 0.9861811399459839, 'vf_loss': 17.923377990722656}, 'grad_time_ms': 759.585}",3934253,53113.86532020569,-152.3823699226433,cda-server-6,24,-188.88444817631853,{},12432,10.157.146.6,{},-142.97483859918682,0,1200,2025-08-30_05-21-52,518,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756524112,50.0,621600,53113.86532020569,112.65741324424744,518
+622800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 96507.54, 'num_steps_sampled': 622800, 'update_time_ms': 2.547, 'num_steps_trained': 622800, 'load_time_ms': 0.662, 'default': {'kl': 0.014047209173440933, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.386871337890625, 'total_loss': 11.968228340148926, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12979204952716827, 'vf_explained_var': 0.9908723831176758, 'vf_loss': 12.083797454833984}, 'grad_time_ms': 772.976}",3934253,53200.06404042244,-152.4626276036031,cda-server-6,24,-188.88444817631853,{},12456,10.157.146.6,{},-142.3687594400822,0,1200,2025-08-30_05-23-18,519,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756524198,50.0,622800,53200.06404042244,86.1987202167511,519
+624000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 97227.837, 'num_steps_sampled': 624000, 'update_time_ms': 2.585, 'num_steps_trained': 624000, 'load_time_ms': 0.663, 'default': {'kl': 0.012714684940874577, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.912516117095947, 'total_loss': 10.766222953796387, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1207198053598404, 'vf_explained_var': 0.9909854531288147, 'vf_loss': 10.874068260192871}, 'grad_time_ms': 766.835}",3934253,53308.747881650925,-151.79671619332342,cda-server-6,24,-167.87898398359434,{},12480,10.157.146.6,{},-141.7748335402267,0,1200,2025-08-30_05-25-07,520,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756524307,50.0,624000,53308.747881650925,108.68384122848511,520
+625200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94634.396, 'num_steps_sampled': 625200, 'update_time_ms': 2.608, 'num_steps_trained': 625200, 'load_time_ms': 0.668, 'default': {'kl': 0.012906880117952824, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.208747863769531, 'total_loss': 11.932040214538574, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12314458936452866, 'vf_explained_var': 0.990728497505188, 'vf_loss': 12.042116165161133}, 'grad_time_ms': 767.734}",3934253,53392.32917332649,-151.6300232368595,cda-server-6,24,-167.87898398359434,{},12504,10.157.146.6,{},-141.7748335402267,0,1200,2025-08-30_05-26-31,521,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756524391,50.0,625200,53392.32917332649,83.58129167556763,521
+626400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92912.862, 'num_steps_sampled': 626400, 'update_time_ms': 2.461, 'num_steps_trained': 626400, 'load_time_ms': 0.64, 'default': {'kl': 0.01441223919391632, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.211857795715332, 'total_loss': 14.67701530456543, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12073574960231781, 'vf_explained_var': 0.9877294898033142, 'vf_loss': 14.783159255981445}, 'grad_time_ms': 766.158}",3934253,53505.60668492317,-151.439543385995,cda-server-6,24,-167.87898398359434,{},12528,10.157.146.6,{},-139.45819028197874,0,1200,2025-08-30_05-28-24,522,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756524504,50.0,626400,53505.60668492317,113.27751159667969,522
+627600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93068.389, 'num_steps_sampled': 627600, 'update_time_ms': 2.534, 'num_steps_trained': 627600, 'load_time_ms': 0.644, 'default': {'kl': 0.011968232691287994, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.311710357666016, 'total_loss': 15.748764038085938, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12205375730991364, 'vf_explained_var': 0.9866352081298828, 'vf_loss': 15.858699798583984}, 'grad_time_ms': 786.6}",3934253,53589.059653282166,-151.3334212564389,cda-server-6,24,-167.87898398359434,{},12552,10.157.146.6,{},-139.45819028197874,0,1200,2025-08-30_05-29-47,523,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756524587,50.0,627600,53589.059653282166,83.45296835899353,523
+628800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95102.69, 'num_steps_sampled': 628800, 'update_time_ms': 2.546, 'num_steps_trained': 628800, 'load_time_ms': 0.646, 'default': {'kl': 0.011504167690873146, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.257884979248047, 'total_loss': 20.620460510253906, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11923030763864517, 'vf_explained_var': 0.9844631552696228, 'vf_loss': 20.72804069519043}, 'grad_time_ms': 778.226}",3934253,53686.11592555046,-151.36060793239966,cda-server-6,24,-162.56349252872974,{},12576,10.157.146.6,{},-139.45819028197874,0,1200,2025-08-30_05-31-25,524,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756524685,50.0,628800,53686.11592555046,97.05627226829529,524
+630000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95618.693, 'num_steps_sampled': 630000, 'update_time_ms': 2.56, 'num_steps_trained': 630000, 'load_time_ms': 0.616, 'default': {'kl': 0.012765922583639622, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.11794662475586, 'total_loss': 11.24935531616211, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1303068846464157, 'vf_explained_var': 0.9913408756256104, 'vf_loss': 11.36673641204834}, 'grad_time_ms': 785.336}",3934253,53788.19004368782,-151.2742824012452,cda-server-6,24,-162.56349252872974,{},12600,10.157.146.6,{},-139.45819028197874,0,1200,2025-08-30_05-33-07,525,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756524787,50.0,630000,53788.19004368782,102.07411813735962,525
+631200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 98916.172, 'num_steps_sampled': 631200, 'update_time_ms': 2.524, 'num_steps_trained': 631200, 'load_time_ms': 0.615, 'default': {'kl': 0.014033918268978596, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.2867431640625, 'total_loss': 21.977487564086914, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12659567594528198, 'vf_explained_var': 0.9823175072669983, 'vf_loss': 22.089872360229492}, 'grad_time_ms': 773.845}",3934253,53883.322149038315,-151.7377933753082,cda-server-6,24,-166.93235202604248,{},12624,10.157.146.6,{},-140.14347767908308,0,1200,2025-08-30_05-34-42,526,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756524882,50.0,631200,53883.322149038315,95.13210535049438,526
+632400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 99015.324, 'num_steps_sampled': 632400, 'update_time_ms': 2.479, 'num_steps_trained': 632400, 'load_time_ms': 0.616, 'default': {'kl': 0.01316943857818842, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.11813735961914, 'total_loss': 15.774693489074707, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11189457774162292, 'vf_explained_var': 0.988402783870697, 'vf_loss': 15.87325382232666}, 'grad_time_ms': 772.287}",3934253,53999.16732788086,-151.79679673759537,cda-server-6,24,-166.93235202604248,{},12648,10.157.146.6,{},-142.85611414435792,0,1200,2025-08-30_05-36-38,527,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756524998,50.0,632400,53999.16732788086,115.84517884254456,527
+633600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 97399.361, 'num_steps_sampled': 633600, 'update_time_ms': 2.551, 'num_steps_trained': 633600, 'load_time_ms': 0.618, 'default': {'kl': 0.013452763669192791, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.95552921295166, 'total_loss': 11.074514389038086, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12565070390701294, 'vf_explained_var': 0.9907246828079224, 'vf_loss': 11.186546325683594}, 'grad_time_ms': 779.538}",3934253,54095.73775577545,-151.85492234567778,cda-server-6,24,-166.93235202604248,{},12672,10.157.146.6,{},-142.85611414435792,0,1200,2025-08-30_05-38-14,528,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756525094,50.0,633600,54095.73775577545,96.57042789459229,528
+634800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 98868.009, 'num_steps_sampled': 634800, 'update_time_ms': 2.695, 'num_steps_trained': 634800, 'load_time_ms': 0.614, 'default': {'kl': 0.012571911327540874, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.765262126922607, 'total_loss': 20.411996841430664, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11248551309108734, 'vf_explained_var': 0.984770655632019, 'vf_loss': 20.511754989624023}, 'grad_time_ms': 771.55}",3934253,54196.54490971565,-151.80911372745547,cda-server-6,24,-169.4209576894632,{},12696,10.157.146.6,{},-139.84469927279616,0,1200,2025-08-30_05-39-55,529,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756525195,50.0,634800,54196.54490971565,100.8071539402008,529
+636000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 97273.899, 'num_steps_sampled': 636000, 'update_time_ms': 2.68, 'num_steps_trained': 636000, 'load_time_ms': 0.612, 'default': {'kl': 0.013294359669089317, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.232682228088379, 'total_loss': 13.754680633544922, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12237784266471863, 'vf_explained_var': 0.9897999167442322, 'vf_loss': 13.86359977722168}, 'grad_time_ms': 774.521}",3934253,54289.31747460365,-151.66786411049014,cda-server-6,24,-169.4209576894632,{},12720,10.157.146.6,{},-139.84469927279616,0,1200,2025-08-30_05-41-28,530,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756525288,50.0,636000,54289.31747460365,92.77256488800049,530
+637200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 96229.918, 'num_steps_sampled': 637200, 'update_time_ms': 2.633, 'num_steps_trained': 637200, 'load_time_ms': 0.613, 'default': {'kl': 0.01316928118467331, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.252899169921875, 'total_loss': 14.948100090026855, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12974220514297485, 'vf_explained_var': 0.9902970790863037, 'vf_loss': 15.064509391784668}, 'grad_time_ms': 772.885}",3934253,54362.44271707535,-151.8159965155838,cda-server-6,24,-169.4209576894632,{},12744,10.157.146.6,{},-139.84469927279616,0,1200,2025-08-30_05-42-41,531,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756525361,50.0,637200,54362.44271707535,73.12524247169495,531
+638400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 96145.331, 'num_steps_sampled': 638400, 'update_time_ms': 2.666, 'num_steps_trained': 638400, 'load_time_ms': 0.62, 'default': {'kl': 0.012331483885645866, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.109546661376953, 'total_loss': 11.662945747375488, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12200065702199936, 'vf_explained_var': 0.9907290935516357, 'vf_loss': 11.772459983825684}, 'grad_time_ms': 773.306}",3934253,54474.878903627396,-151.90677020646783,cda-server-6,24,-169.4209576894632,{},12768,10.157.146.6,{},-139.84469927279616,0,1200,2025-08-30_05-44-33,532,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756525473,50.0,638400,54474.878903627396,112.43618655204773,532
+639600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95973.23, 'num_steps_sampled': 639600, 'update_time_ms': 2.589, 'num_steps_trained': 639600, 'load_time_ms': 0.618, 'default': {'kl': 0.011274803429841995, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.791914939880371, 'total_loss': 13.35634708404541, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10990992188453674, 'vf_explained_var': 0.9885459542274475, 'vf_loss': 13.454841613769531}, 'grad_time_ms': 769.58}",3934253,54556.572207927704,-152.0021072586479,cda-server-6,24,-163.8275029739961,{},12792,10.157.146.6,{},-141.7869101792552,0,1200,2025-08-30_05-45-55,533,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756525555,50.0,639600,54556.572207927704,81.69330430030823,533
+640800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95133.223, 'num_steps_sampled': 640800, 'update_time_ms': 2.603, 'num_steps_trained': 640800, 'load_time_ms': 0.618, 'default': {'kl': 0.011796173639595509, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.725955486297607, 'total_loss': 17.79370880126953, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11688640713691711, 'vf_explained_var': 0.9858031868934631, 'vf_loss': 17.898653030395508}, 'grad_time_ms': 775.256}",3934253,54645.28475642204,-151.91061175978035,cda-server-6,24,-163.8275029739961,{},12816,10.157.146.6,{},-141.7869101792552,0,1200,2025-08-30_05-47-24,534,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756525644,50.0,640800,54645.28475642204,88.71254849433899,534
+642000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95814.997, 'num_steps_sampled': 642000, 'update_time_ms': 2.577, 'num_steps_trained': 642000, 'load_time_ms': 0.617, 'default': {'kl': 0.01341434195637703, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.816812515258789, 'total_loss': 14.266355514526367, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12442073225975037, 'vf_explained_var': 0.9879933595657349, 'vf_loss': 14.377195358276367}, 'grad_time_ms': 775.052}",3934253,54754.17452979088,-151.75500545859387,cda-server-6,24,-163.8275029739961,{},12840,10.157.146.6,{},-142.79085044915752,0,1200,2025-08-30_05-49-13,535,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756525753,50.0,642000,54754.17452979088,108.88977336883545,535
+643200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 96548.363, 'num_steps_sampled': 643200, 'update_time_ms': 2.567, 'num_steps_trained': 643200, 'load_time_ms': 0.616, 'default': {'kl': 0.01187937706708908, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.564441204071045, 'total_loss': 13.592453002929688, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.09899282455444336, 'vf_explained_var': 0.9898480176925659, 'vf_loss': 13.679417610168457}, 'grad_time_ms': 776.95}",3934253,54856.658707141876,-151.69387446085312,cda-server-6,24,-163.9252472156271,{},12864,10.157.146.6,{},-149.25603792487527,0,1200,2025-08-30_05-50-55,536,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756525855,50.0,643200,54856.658707141876,102.48417735099792,536
+644400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93374.482, 'num_steps_sampled': 644400, 'update_time_ms': 2.65, 'num_steps_trained': 644400, 'load_time_ms': 0.617, 'default': {'kl': 0.013803391717374325, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.7664923667907715, 'total_loss': 10.183890342712402, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12922273576259613, 'vf_explained_var': 0.9913797378540039, 'vf_loss': 10.299137115478516}, 'grad_time_ms': 774.203}",3934253,54940.73849415779,-151.52124492364973,cda-server-6,24,-163.9252472156271,{},12888,10.157.146.6,{},-148.7753803736122,0,1200,2025-08-30_05-52-19,537,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756525939,50.0,644400,54940.73849415779,84.07978701591492,537
+645600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93288.352, 'num_steps_sampled': 645600, 'update_time_ms': 2.676, 'num_steps_trained': 645600, 'load_time_ms': 0.63, 'default': {'kl': 0.01337174791842699, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.903218746185303, 'total_loss': 7.999932289123535, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13875660300254822, 'vf_explained_var': 0.993107259273529, 'vf_loss': 8.125149726867676}, 'grad_time_ms': 775.502}",3934253,55036.46237754822,-151.58003477042269,cda-server-6,24,-163.9252472156271,{},12912,10.157.146.6,{},-147.62447533124597,0,1200,2025-08-30_05-53-55,538,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756526035,50.0,645600,55036.46237754822,95.72388339042664,538
+646800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94409.314, 'num_steps_sampled': 646800, 'update_time_ms': 2.543, 'num_steps_trained': 646800, 'load_time_ms': 0.631, 'default': {'kl': 0.011509610339999199, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.807576656341553, 'total_loss': 12.251175880432129, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1061524972319603, 'vf_explained_var': 0.9901471734046936, 'vf_loss': 12.345675468444824}, 'grad_time_ms': 768.137}",3934253,55148.40368771553,-151.5295869223695,cda-server-6,24,-163.9252472156271,{},12936,10.157.146.6,{},-141.74760840253305,0,1200,2025-08-30_05-55-47,539,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756526147,50.0,646800,55148.40368771553,111.94131016731262,539
+648000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94642.933, 'num_steps_sampled': 648000, 'update_time_ms': 2.522, 'num_steps_trained': 648000, 'load_time_ms': 0.636, 'default': {'kl': 0.01324335765093565, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.777318954467773, 'total_loss': 14.313945770263672, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10932556539773941, 'vf_explained_var': 0.9886135458946228, 'vf_loss': 14.40986442565918}, 'grad_time_ms': 775.809}",3934253,55243.58929491043,-151.74086113916158,cda-server-6,24,-166.98272412453377,{},12960,10.157.146.6,{},-141.74760840253305,0,1200,2025-08-30_05-57-22,540,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756526242,50.0,648000,55243.58929491043,95.18560719490051,540
+649200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95782.371, 'num_steps_sampled': 649200, 'update_time_ms': 2.528, 'num_steps_trained': 649200, 'load_time_ms': 0.639, 'default': {'kl': 0.011101160198450089, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.8202033042907715, 'total_loss': 24.006174087524414, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1093890443444252, 'vf_explained_var': 0.9816538691520691, 'vf_loss': 24.104326248168945}, 'grad_time_ms': 785.284}",3934253,55328.203587055206,-151.7548923149819,cda-server-6,24,-166.98272412453377,{},12984,10.157.146.6,{},-139.9202484174889,0,1200,2025-08-30_05-58-47,541,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756526327,50.0,649200,55328.203587055206,84.61429214477539,541
+650400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93046.918, 'num_steps_sampled': 650400, 'update_time_ms': 2.545, 'num_steps_trained': 650400, 'load_time_ms': 0.628, 'default': {'kl': 0.013351892121136189, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.874947547912598, 'total_loss': 13.548004150390625, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12411337345838547, 'vf_explained_var': 0.9898288249969482, 'vf_loss': 13.658597946166992}, 'grad_time_ms': 789.284}",3934253,55413.32578778267,-151.8367207524007,cda-server-6,24,-166.98272412453377,{},13008,10.157.146.6,{},-139.9202484174889,0,1200,2025-08-30_06-00-12,542,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756526412,50.0,650400,55413.32578778267,85.12220072746277,542
+651600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 97231.22, 'num_steps_sampled': 651600, 'update_time_ms': 2.556, 'num_steps_trained': 651600, 'load_time_ms': 0.622, 'default': {'kl': 0.014025096781551838, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.527444839477539, 'total_loss': 16.716171264648438, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12651662528514862, 'vf_explained_var': 0.9879534840583801, 'vf_loss': 16.828487396240234}, 'grad_time_ms': 774.998}",3934253,55536.71937775612,-152.20230023532304,cda-server-6,24,-167.6957739032894,{},13032,10.157.146.6,{},-139.9202484174889,0,1200,2025-08-30_06-02-16,543,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756526536,50.0,651600,55536.71937775612,123.3935899734497,543
+652800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 96703.712, 'num_steps_sampled': 652800, 'update_time_ms': 2.543, 'num_steps_trained': 652800, 'load_time_ms': 0.633, 'default': {'kl': 0.010980258695781231, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.6602020263671875, 'total_loss': 22.34575653076172, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10523133724927902, 'vf_explained_var': 0.9839035868644714, 'vf_loss': 22.439870834350586}, 'grad_time_ms': 767.295}",3934253,55620.079362392426,-152.33134642985738,cda-server-6,24,-167.6957739032894,{},13056,10.157.146.6,{},-139.9202484174889,0,1200,2025-08-30_06-03-39,544,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756526619,50.0,652800,55620.079362392426,83.35998463630676,544
+654000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 96466.977, 'num_steps_sampled': 654000, 'update_time_ms': 2.52, 'num_steps_trained': 654000, 'load_time_ms': 0.641, 'default': {'kl': 0.011783335357904434, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.79296875, 'total_loss': 40.88715362548828, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10244444757699966, 'vf_explained_var': 0.9700209498405457, 'vf_loss': 40.9776611328125}, 'grad_time_ms': 765.164}",3934253,55726.58039832115,-152.20774698978352,cda-server-6,24,-172.32353041127666,{},13080,10.157.146.6,{},-149.34913540216635,0,1200,2025-08-30_06-05-25,545,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756526725,50.0,654000,55726.58039832115,106.5010359287262,545
+655200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94113.276, 'num_steps_sampled': 655200, 'update_time_ms': 2.559, 'num_steps_trained': 655200, 'load_time_ms': 0.652, 'default': {'kl': 0.012306980788707733, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.727290630340576, 'total_loss': 10.178478240966797, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12441620230674744, 'vf_explained_var': 0.9918663501739502, 'vf_loss': 10.290432929992676}, 'grad_time_ms': 773.947}",3934253,55805.616351127625,-152.20533783139092,cda-server-6,24,-172.32353041127666,{},13104,10.157.146.6,{},-143.8546421528748,0,1200,2025-08-30_06-06-44,546,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756526804,50.0,655200,55805.616351127625,79.03595280647278,546
+656400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93427.926, 'num_steps_sampled': 656400, 'update_time_ms': 2.466, 'num_steps_trained': 656400, 'load_time_ms': 0.643, 'default': {'kl': 0.011928428895771503, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.634652137756348, 'total_loss': 16.324533462524414, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10499259829521179, 'vf_explained_var': 0.9873420000076294, 'vf_loss': 16.417448043823242}, 'grad_time_ms': 773.586}",3934253,55882.83739686012,-151.81330852174514,cda-server-6,24,-172.32353041127666,{},13128,10.157.146.6,{},-143.8546421528748,0,1200,2025-08-30_06-08-02,547,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756526882,50.0,656400,55882.83739686012,77.22104573249817,547
+657600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94058.335, 'num_steps_sampled': 657600, 'update_time_ms': 2.362, 'num_steps_trained': 657600, 'load_time_ms': 0.634, 'default': {'kl': 0.011866304092109203, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.796850204467773, 'total_loss': 22.88044548034668, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11830038577318192, 'vf_explained_var': 0.9822542667388916, 'vf_loss': 22.986730575561523}, 'grad_time_ms': 766.724}",3934253,55984.79539489746,-151.95285161137662,cda-server-6,24,-172.32353041127666,{},13152,10.157.146.6,{},-143.8546421528748,0,1200,2025-08-30_06-09-44,548,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756526984,50.0,657600,55984.79539489746,101.95799803733826,548
+658800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94776.906, 'num_steps_sampled': 658800, 'update_time_ms': 2.377, 'num_steps_trained': 658800, 'load_time_ms': 0.635, 'default': {'kl': 0.013383209705352783, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.047952651977539, 'total_loss': 16.600008010864258, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1312945932149887, 'vf_explained_var': 0.987158477306366, 'vf_loss': 16.71775245666504}, 'grad_time_ms': 775.892}",3934253,56104.01416516304,-151.82635310445673,cda-server-6,24,-166.28430668358237,{},13176,10.157.146.6,{},-139.8730050272048,0,1200,2025-08-30_06-11-43,549,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756527103,50.0,658800,56104.01416516304,119.21877026557922,549
+660000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95396.522, 'num_steps_sampled': 660000, 'update_time_ms': 2.399, 'num_steps_trained': 660000, 'load_time_ms': 0.631, 'default': {'kl': 0.012394605204463005, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.464038372039795, 'total_loss': 13.411670684814453, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11062879860401154, 'vf_explained_var': 0.9897158741950989, 'vf_loss': 13.509750366210938}, 'grad_time_ms': 766.048}",3934253,56205.296759843826,-151.7888369248003,cda-server-6,24,-166.28430668358237,{},13200,10.157.146.6,{},-139.8730050272048,0,1200,2025-08-30_06-13-24,550,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756527204,50.0,660000,56205.296759843826,101.28259468078613,550
+661200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95110.739, 'num_steps_sampled': 661200, 'update_time_ms': 2.409, 'num_steps_trained': 661200, 'load_time_ms': 0.633, 'default': {'kl': 0.013078085146844387, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.584109783172607, 'total_loss': 15.645307540893555, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11028580367565155, 'vf_explained_var': 0.9869313836097717, 'vf_loss': 15.742351531982422}, 'grad_time_ms': 755.638}",3934253,56286.949072122574,-151.88660863617798,cda-server-6,24,-166.28430668358237,{},13224,10.157.146.6,{},-139.8730050272048,0,1200,2025-08-30_06-14-46,551,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756527286,50.0,661200,56286.949072122574,81.65231227874756,551
+662400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 97874.593, 'num_steps_sampled': 662400, 'update_time_ms': 2.444, 'num_steps_trained': 662400, 'load_time_ms': 0.639, 'default': {'kl': 0.013991860672831535, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.786781311035156, 'total_loss': 15.687580108642578, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12217790633440018, 'vf_explained_var': 0.9863888621330261, 'vf_loss': 15.795589447021484}, 'grad_time_ms': 749.511}",3934253,56399.64902329445,-151.60787893259965,cda-server-6,24,-164.2084314069166,{},13248,10.157.146.6,{},-139.8730050272048,0,1200,2025-08-30_06-16-39,552,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756527399,50.0,662400,56399.64902329445,112.699951171875,552
+663600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94479.078, 'num_steps_sampled': 663600, 'update_time_ms': 2.41, 'num_steps_trained': 663600, 'load_time_ms': 0.65, 'default': {'kl': 0.011322933249175549, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.6803412437438965, 'total_loss': 15.122419357299805, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11909367144107819, 'vf_explained_var': 0.9879716038703918, 'vf_loss': 15.230048179626465}, 'grad_time_ms': 762.713}",3934253,56489.219517707825,-151.44552527452606,cda-server-6,24,-164.2084314069166,{},13272,10.157.146.6,{},-139.8730050272048,0,1200,2025-08-30_06-18-08,553,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756527488,50.0,663600,56489.219517707825,89.57049441337585,553
+664800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 96160.343, 'num_steps_sampled': 664800, 'update_time_ms': 2.455, 'num_steps_trained': 664800, 'load_time_ms': 0.631, 'default': {'kl': 0.0125638572499156, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.583347797393799, 'total_loss': 11.01961612701416, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12256471067667007, 'vf_explained_var': 0.9910979866981506, 'vf_loss': 11.129459381103516}, 'grad_time_ms': 771.023}",3934253,56589.475972890854,-151.67157253702425,cda-server-6,24,-164.2084314069166,{},13296,10.157.146.6,{},-142.75703188287594,0,1200,2025-08-30_06-19-48,554,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756527588,50.0,664800,56589.475972890854,100.25645518302917,554
+666000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 96789.175, 'num_steps_sampled': 666000, 'update_time_ms': 2.525, 'num_steps_trained': 666000, 'load_time_ms': 0.631, 'default': {'kl': 0.01608692668378353, 'cur_lr': 4.999999873689376e-05, 'entropy': 8.026680946350098, 'total_loss': 14.854989051818848, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13966526091098785, 'vf_explained_var': 0.9897687435150146, 'vf_loss': 14.978367805480957}, 'grad_time_ms': 774.245}",3934253,56702.29811143875,-151.56715217271983,cda-server-6,24,-168.90639455884744,{},13320,10.157.146.6,{},-143.36764966395847,0,1200,2025-08-30_06-21-41,555,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756527701,50.0,666000,56702.29811143875,112.82213854789734,555
+667200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 98435.435, 'num_steps_sampled': 667200, 'update_time_ms': 2.527, 'num_steps_trained': 667200, 'load_time_ms': 0.62, 'default': {'kl': 0.013392424210906029, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.8170342445373535, 'total_loss': 10.752395629882812, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11502734571695328, 'vf_explained_var': 0.9913275837898254, 'vf_loss': 10.853862762451172}, 'grad_time_ms': 770.789}",3934253,56797.761281490326,-151.40665633516022,cda-server-6,24,-168.90639455884744,{},13344,10.157.146.6,{},-139.96352003292222,0,1200,2025-08-30_06-23-17,556,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756527797,50.0,667200,56797.761281490326,95.4631700515747,556
+668400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 101548.632, 'num_steps_sampled': 668400, 'update_time_ms': 2.522, 'num_steps_trained': 668400, 'load_time_ms': 0.63, 'default': {'kl': 0.013542591594159603, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.707234859466553, 'total_loss': 9.735451698303223, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1209249496459961, 'vf_explained_var': 0.9922139644622803, 'vf_loss': 9.84266471862793}, 'grad_time_ms': 777.582}",3934253,56906.1828122139,-151.23620579021028,cda-server-6,24,-168.90639455884744,{},13368,10.157.146.6,{},-139.96352003292222,0,1200,2025-08-30_06-25-05,557,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756527905,50.0,668400,56906.1828122139,108.42153072357178,557
+669600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 102200.348, 'num_steps_sampled': 669600, 'update_time_ms': 2.554, 'num_steps_trained': 669600, 'load_time_ms': 0.628, 'default': {'kl': 0.01427131425589323, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.790090560913086, 'total_loss': 8.442832946777344, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1334570199251175, 'vf_explained_var': 0.9929354190826416, 'vf_loss': 8.561840057373047}, 'grad_time_ms': 762.454}",3934253,57014.50557184219,-151.2608926220695,cda-server-6,24,-168.90639455884744,{},13392,10.157.146.6,{},-139.96352003292222,0,1200,2025-08-30_06-26-54,558,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756528014,50.0,669600,57014.50557184219,108.3227596282959,558
+670800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 101061.44, 'num_steps_sampled': 670800, 'update_time_ms': 2.523, 'num_steps_trained': 670800, 'load_time_ms': 0.622, 'default': {'kl': 0.012106995098292828, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.880985736846924, 'total_loss': 13.949009895324707, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11816269904375076, 'vf_explained_var': 0.9890486598014832, 'vf_loss': 14.054914474487305}, 'grad_time_ms': 753.675}",3934253,57122.24686527252,-151.31124070736968,cda-server-6,24,-164.1500952171991,{},13416,10.157.146.6,{},-139.96352003292222,0,1200,2025-08-30_06-28-41,559,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756528121,50.0,670800,57122.24686527252,107.74129343032837,559
+672000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 99077.936, 'num_steps_sampled': 672000, 'update_time_ms': 2.475, 'num_steps_trained': 672000, 'load_time_ms': 0.625, 'default': {'kl': 0.012361031025648117, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.896833419799805, 'total_loss': 17.845319747924805, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1219933032989502, 'vf_explained_var': 0.9871928691864014, 'vf_loss': 17.954797744750977}, 'grad_time_ms': 751.704}",3934253,57203.67510128021,-151.56446937536896,cda-server-6,24,-164.47141499845398,{},13440,10.157.146.6,{},-143.1663559505958,0,1200,2025-08-30_06-30-03,560,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756528203,50.0,672000,57203.67510128021,81.42823600769043,560
+673200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 101133.18, 'num_steps_sampled': 673200, 'update_time_ms': 2.442, 'num_steps_trained': 673200, 'load_time_ms': 0.618, 'default': {'kl': 0.012184562161564827, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.8777337074279785, 'total_loss': 17.04519271850586, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12136489897966385, 'vf_explained_var': 0.9857383370399475, 'vf_loss': 17.154220581054688}, 'grad_time_ms': 763.265}",3934253,57305.99560403824,-151.73232247828938,cda-server-6,24,-164.47141499845398,{},13464,10.157.146.6,{},-139.79079619262694,0,1200,2025-08-30_06-31-45,561,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756528305,50.0,673200,57305.99560403824,102.32050275802612,561
+674400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 100718.488, 'num_steps_sampled': 674400, 'update_time_ms': 2.403, 'num_steps_trained': 674400, 'load_time_ms': 0.617, 'default': {'kl': 0.012234192341566086, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.9072394371032715, 'total_loss': 12.1405668258667, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12168225646018982, 'vf_explained_var': 0.9896350502967834, 'vf_loss': 12.249862670898438}, 'grad_time_ms': 763.304}",3934253,57414.548646211624,-151.86656038831188,cda-server-6,24,-164.47141499845398,{},13488,10.157.146.6,{},-139.79079619262694,0,1200,2025-08-30_06-33-34,562,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756528414,50.0,674400,57414.548646211624,108.55304217338562,562
+675600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 100828.693, 'num_steps_sampled': 675600, 'update_time_ms': 2.446, 'num_steps_trained': 675600, 'load_time_ms': 0.612, 'default': {'kl': 0.011146489530801773, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.461226463317871, 'total_loss': 10.990604400634766, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12205490469932556, 'vf_explained_var': 0.9911925792694092, 'vf_loss': 11.101373672485352}, 'grad_time_ms': 756.709}",3934253,57505.155586481094,-151.8312616787746,cda-server-6,24,-165.17639154659727,{},13512,10.157.146.6,{},-135.40392465635645,0,1200,2025-08-30_06-35-04,563,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756528504,50.0,675600,57505.155586481094,90.60694026947021,563
+676800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 99875.662, 'num_steps_sampled': 676800, 'update_time_ms': 2.389, 'num_steps_trained': 676800, 'load_time_ms': 0.615, 'default': {'kl': 0.013896778225898743, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.457207679748535, 'total_loss': 11.320537567138672, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13056457042694092, 'vf_explained_var': 0.9907848834991455, 'vf_loss': 11.437031745910645}, 'grad_time_ms': 762.526}",3934253,57595.93927574158,-152.12418592379265,cda-server-6,24,-168.22577448549237,{},13536,10.157.146.6,{},-135.40392465635645,0,1200,2025-08-30_06-36-35,564,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756528595,50.0,676800,57595.93927574158,90.78368926048279,564
+678000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 100426.129, 'num_steps_sampled': 678000, 'update_time_ms': 2.36, 'num_steps_trained': 678000, 'load_time_ms': 0.613, 'default': {'kl': 0.011276878416538239, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.278744220733643, 'total_loss': 16.736454010009766, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11820343136787415, 'vf_explained_var': 0.9870368242263794, 'vf_loss': 16.843238830566406}, 'grad_time_ms': 748.383}",3934253,57714.12493252754,-152.0202009410142,cda-server-6,24,-168.22577448549237,{},13560,10.157.146.6,{},-135.40392465635645,0,1200,2025-08-30_06-38-33,565,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756528713,50.0,678000,57714.12493252754,118.18565678596497,565
+679200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 101492.584, 'num_steps_sampled': 679200, 'update_time_ms': 2.34, 'num_steps_trained': 679200, 'load_time_ms': 0.615, 'default': {'kl': 0.014653812162578106, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.344961166381836, 'total_loss': 20.03702735900879, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11613241583108902, 'vf_explained_var': 0.9865771532058716, 'vf_loss': 20.138322830200195}, 'grad_time_ms': 735.91}",3934253,57820.13002371788,-151.91582745968978,cda-server-6,24,-168.22577448549237,{},13584,10.157.146.6,{},-135.40392465635645,0,1200,2025-08-30_06-40-19,566,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756528819,50.0,679200,57820.13002371788,106.00509119033813,566
+680400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 98846.678, 'num_steps_sampled': 680400, 'update_time_ms': 2.359, 'num_steps_trained': 680400, 'load_time_ms': 0.615, 'default': {'kl': 0.011863755993545055, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.693569660186768, 'total_loss': 20.87421226501465, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11812932044267654, 'vf_explained_var': 0.9834575653076172, 'vf_loss': 20.98032569885254}, 'grad_time_ms': 732.276}",3934253,57902.05630970001,-151.84447129846183,cda-server-6,24,-168.22577448549237,{},13608,10.157.146.6,{},-141.66634416044175,0,1200,2025-08-30_06-41-41,567,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756528901,50.0,680400,57902.05630970001,81.92628598213196,567
+681600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 97790.357, 'num_steps_sampled': 681600, 'update_time_ms': 2.397, 'num_steps_trained': 681600, 'load_time_ms': 0.616, 'default': {'kl': 0.011216883547604084, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.75992488861084, 'total_loss': 16.491910934448242, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12139460444450378, 'vf_explained_var': 0.98785400390625, 'vf_loss': 16.601947784423828}, 'grad_time_ms': 753.3}",3934253,58000.02692985535,-151.5986989681715,cda-server-6,24,-166.57318712299187,{},13632,10.157.146.6,{},-139.99448377052,0,1200,2025-08-30_06-43-19,568,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756528999,50.0,681600,58000.02692985535,97.97062015533447,568
+682800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 98730.33, 'num_steps_sampled': 682800, 'update_time_ms': 2.389, 'num_steps_trained': 682800, 'load_time_ms': 0.62, 'default': {'kl': 0.011672453954815865, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.440184593200684, 'total_loss': 13.130718231201172, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10527868568897247, 'vf_explained_var': 0.9891159534454346, 'vf_loss': 13.224178314208984}, 'grad_time_ms': 760.107}",3934253,58117.23666000366,-151.80092093432214,cda-server-6,24,-166.57318712299187,{},13656,10.157.146.6,{},-139.99448377052,0,1200,2025-08-30_06-45-16,569,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756529116,50.0,682800,58117.23666000366,117.20973014831543,569
+684000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 97885.303, 'num_steps_sampled': 684000, 'update_time_ms': 2.389, 'num_steps_trained': 684000, 'load_time_ms': 0.614, 'default': {'kl': 0.014902864582836628, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.657007694244385, 'total_loss': 12.331796646118164, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1290094405412674, 'vf_explained_var': 0.989945650100708, 'vf_loss': 12.445716857910156}, 'grad_time_ms': 756.273}",3934253,58190.176151037216,-152.00770656228394,cda-server-6,24,-166.57318712299187,{},13680,10.157.146.6,{},-139.99448377052,0,1200,2025-08-30_06-46-29,570,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756529189,50.0,684000,58190.176151037216,72.93949103355408,570
+685200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94176.655, 'num_steps_sampled': 685200, 'update_time_ms': 2.514, 'num_steps_trained': 685200, 'load_time_ms': 0.617, 'default': {'kl': 0.013572430238127708, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.551823139190674, 'total_loss': 8.981759071350098, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13007181882858276, 'vf_explained_var': 0.9922204613685608, 'vf_loss': 9.098089218139648}, 'grad_time_ms': 753.349}",3934253,58255.38171863556,-151.82728255358478,cda-server-6,24,-166.57318712299187,{},13704,10.157.146.6,{},-139.99448377052,0,1200,2025-08-30_06-47-35,571,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756529255,50.0,685200,58255.38171863556,65.2055675983429,571
+686400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92049.621, 'num_steps_sampled': 686400, 'update_time_ms': 2.474, 'num_steps_trained': 686400, 'load_time_ms': 0.621, 'default': {'kl': 0.013356123119592667, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.2052764892578125, 'total_loss': 12.253599166870117, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10353487730026245, 'vf_explained_var': 0.9895342588424683, 'vf_loss': 12.343612670898438}, 'grad_time_ms': 753.076}",3934253,58342.66126012802,-151.84706925774134,cda-server-6,24,-165.6289682061747,{},13728,10.157.146.6,{},-146.64909800243484,0,1200,2025-08-30_06-49-02,572,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756529342,50.0,686400,58342.66126012802,87.27954149246216,572
+687600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92856.237, 'num_steps_sampled': 687600, 'update_time_ms': 2.458, 'num_steps_trained': 687600, 'load_time_ms': 0.614, 'default': {'kl': 0.012467894703149796, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.665492534637451, 'total_loss': 12.708492279052734, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11531029641628265, 'vf_explained_var': 0.9901459217071533, 'vf_loss': 12.811178207397461}, 'grad_time_ms': 761.337}",3934253,58441.416241407394,-151.3989287948314,cda-server-6,24,-160.94876140781466,{},13752,10.157.146.6,{},-139.5995533319289,0,1200,2025-08-30_06-50-41,573,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756529441,50.0,687600,58441.416241407394,98.75498127937317,573
+688800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92962.609, 'num_steps_sampled': 688800, 'update_time_ms': 2.469, 'num_steps_trained': 688800, 'load_time_ms': 0.611, 'default': {'kl': 0.012100116349756718, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.34058141708374, 'total_loss': 18.82788848876953, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11172984540462494, 'vf_explained_var': 0.9857650399208069, 'vf_loss': 18.927371978759766}, 'grad_time_ms': 728.025}",3934253,58532.93010187149,-151.50244165879874,cda-server-6,24,-161.4437523974731,{},13776,10.157.146.6,{},-139.5995533319289,0,1200,2025-08-30_06-52-12,574,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756529532,50.0,688800,58532.93010187149,91.51386046409607,574
+690000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 90303.556, 'num_steps_sampled': 690000, 'update_time_ms': 2.483, 'num_steps_trained': 690000, 'load_time_ms': 0.61, 'default': {'kl': 0.012349085882306099, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.143519878387451, 'total_loss': 17.44886016845703, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11287827044725418, 'vf_explained_var': 0.9857209324836731, 'vf_loss': 17.549238204956055}, 'grad_time_ms': 718.251}",3934253,58624.427540779114,-151.5061001221446,cda-server-6,24,-161.4437523974731,{},13800,10.157.146.6,{},-139.5995533319289,0,1200,2025-08-30_06-53-44,575,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756529624,50.0,690000,58624.427540779114,91.49743890762329,575
+691200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 91299.366, 'num_steps_sampled': 691200, 'update_time_ms': 2.473, 'num_steps_trained': 691200, 'load_time_ms': 0.617, 'default': {'kl': 0.011632119305431843, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.450540542602539, 'total_loss': 18.479217529296875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.113397017121315, 'vf_explained_var': 0.9858831763267517, 'vf_loss': 18.58083724975586}, 'grad_time_ms': 718.423}",3934253,58740.391570568085,-151.6670901938319,cda-server-6,24,-164.29363151307973,{},13824,10.157.146.6,{},-139.5995533319289,0,1200,2025-08-30_06-55-40,576,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756529740,50.0,691200,58740.391570568085,115.96402978897095,576
+692400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 90758.536, 'num_steps_sampled': 692400, 'update_time_ms': 2.517, 'num_steps_trained': 692400, 'load_time_ms': 0.618, 'default': {'kl': 0.014467747882008553, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.817798137664795, 'total_loss': 15.944793701171875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13392749428749084, 'vf_explained_var': 0.9877843856811523, 'vf_loss': 16.06407356262207}, 'grad_time_ms': 723.72}",3934253,58816.963297605515,-151.96521562869458,cda-server-6,24,-167.04381562923297,{},13848,10.157.146.6,{},-142.44307414123705,0,1200,2025-08-30_06-56-56,577,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756529816,50.0,692400,58816.963297605515,76.57172703742981,577
+693600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93282.483, 'num_steps_sampled': 693600, 'update_time_ms': 2.52, 'num_steps_trained': 693600, 'load_time_ms': 0.627, 'default': {'kl': 0.01336402352899313, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.232810974121094, 'total_loss': 22.106884002685547, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11316641420125961, 'vf_explained_var': 0.9828341007232666, 'vf_loss': 22.206520080566406}, 'grad_time_ms': 715.479}",3934253,58940.09111189842,-151.83281265991272,cda-server-6,24,-167.04381562923297,{},13872,10.157.146.6,{},-140.01545140863857,0,1200,2025-08-30_06-58-59,578,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756529939,50.0,693600,58940.09111189842,123.12781429290771,578
+694800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 90937.401, 'num_steps_sampled': 694800, 'update_time_ms': 2.566, 'num_steps_trained': 694800, 'load_time_ms': 0.63, 'default': {'kl': 0.012695417739450932, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.237936973571777, 'total_loss': 11.189031600952148, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10490371286869049, 'vf_explained_var': 0.9903163909912109, 'vf_loss': 11.281082153320312}, 'grad_time_ms': 726.687}",3934253,59033.962436914444,-151.86428952841857,cda-server-6,24,-167.04381562923297,{},13896,10.157.146.6,{},-140.01545140863857,0,1200,2025-08-30_07-00-33,579,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756530033,50.0,694800,59033.962436914444,93.87132501602173,579
+696000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92486.912, 'num_steps_sampled': 696000, 'update_time_ms': 2.619, 'num_steps_trained': 696000, 'load_time_ms': 0.632, 'default': {'kl': 0.010564768686890602, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.240657329559326, 'total_loss': 28.40843391418457, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10259688645601273, 'vf_explained_var': 0.9848769307136536, 'vf_loss': 28.50033187866211}, 'grad_time_ms': 727.581}",3934253,59122.40687298775,-152.01809617008124,cda-server-6,24,-167.04381562923297,{},13920,10.157.146.6,{},-140.01545140863857,0,1200,2025-08-30_07-02-02,580,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756530122,50.0,696000,59122.40687298775,88.44443607330322,580
+697200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94174.302, 'num_steps_sampled': 697200, 'update_time_ms': 2.676, 'num_steps_trained': 697200, 'load_time_ms': 0.627, 'default': {'kl': 0.012403911910951138, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.399474143981934, 'total_loss': 14.142861366271973, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10619829595088959, 'vf_explained_var': 0.9894328713417053, 'vf_loss': 14.23650074005127}, 'grad_time_ms': 729.301}",3934253,59204.50434041023,-152.04174332906396,cda-server-6,24,-165.16394158770373,{},13944,10.157.146.6,{},-140.01545140863857,0,1200,2025-08-30_07-03-24,581,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756530204,50.0,697200,59204.50434041023,82.09746742248535,581
+698400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93912.092, 'num_steps_sampled': 698400, 'update_time_ms': 2.694, 'num_steps_trained': 698400, 'load_time_ms': 0.63, 'default': {'kl': 0.012006421573460102, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.297507286071777, 'total_loss': 15.31088924407959, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.09045369178056717, 'vf_explained_var': 0.9878251552581787, 'vf_loss': 15.389185905456543}, 'grad_time_ms': 717.738}",3934253,59289.04621386528,-152.30105653435592,cda-server-6,24,-165.16394158770373,{},13968,10.157.146.6,{},-149.0772481269036,0,1200,2025-08-30_07-04-49,582,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756530289,50.0,698400,59289.04621386528,84.54187345504761,582
+699600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 91703.455, 'num_steps_sampled': 699600, 'update_time_ms': 2.692, 'num_steps_trained': 699600, 'load_time_ms': 0.633, 'default': {'kl': 0.013890719972550869, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.63686466217041, 'total_loss': 16.806406021118164, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13375477492809296, 'vf_explained_var': 0.9869916439056396, 'vf_loss': 16.926095962524414}, 'grad_time_ms': 716.911}",3934253,59365.7064769268,-152.14242325846607,cda-server-6,24,-165.10500275666027,{},13992,10.157.146.6,{},-141.06966000406916,0,1200,2025-08-30_07-06-05,583,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756530365,50.0,699600,59365.7064769268,76.66026306152344,583
+700800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92539.701, 'num_steps_sampled': 700800, 'update_time_ms': 2.683, 'num_steps_trained': 700800, 'load_time_ms': 0.632, 'default': {'kl': 0.012830524705350399, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.533829689025879, 'total_loss': 16.497915267944336, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11289564520120621, 'vf_explained_var': 0.9874927997589111, 'vf_loss': 16.59781837463379}, 'grad_time_ms': 720.897}",3934253,59465.62331390381,-151.95782594633437,cda-server-6,24,-165.10500275666027,{},14016,10.157.146.6,{},-141.06966000406916,0,1200,2025-08-30_07-07-45,584,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756530465,50.0,700800,59465.62331390381,99.916836977005,584
+702000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93571.579, 'num_steps_sampled': 702000, 'update_time_ms': 2.642, 'num_steps_trained': 702000, 'load_time_ms': 0.636, 'default': {'kl': 0.01353020966053009, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.338387489318848, 'total_loss': 13.563823699951172, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11937059462070465, 'vf_explained_var': 0.9893013834953308, 'vf_loss': 13.66949462890625}, 'grad_time_ms': 732.656}",3934253,59567.556359767914,-151.9279004109191,cda-server-6,24,-165.94330068728993,{},14040,10.157.146.6,{},-141.05374428274698,0,1200,2025-08-30_07-09-27,585,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756530567,50.0,702000,59567.556359767914,101.93304586410522,585
+703200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92809.295, 'num_steps_sampled': 703200, 'update_time_ms': 2.631, 'num_steps_trained': 703200, 'load_time_ms': 0.632, 'default': {'kl': 0.013093508780002594, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.247652053833008, 'total_loss': 9.93628978729248, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12289997935295105, 'vf_explained_var': 0.9934365749359131, 'vf_loss': 10.04593276977539}, 'grad_time_ms': 725.034}",3934253,59675.8199942112,-151.74111855739798,cda-server-6,24,-165.94330068728993,{},14064,10.157.146.6,{},-141.05374428274698,0,1200,2025-08-30_07-11-15,586,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756530675,50.0,703200,59675.8199942112,108.26363444328308,586
+704400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93204.657, 'num_steps_sampled': 704400, 'update_time_ms': 2.598, 'num_steps_trained': 704400, 'load_time_ms': 0.626, 'default': {'kl': 0.011846650391817093, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.4674248695373535, 'total_loss': 8.97598934173584, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12297610193490982, 'vf_explained_var': 0.992843747138977, 'vf_loss': 9.086971282958984}, 'grad_time_ms': 686.738}",3934253,59755.96127986908,-151.67899424222992,cda-server-6,24,-165.94330068728993,{},14088,10.157.146.6,{},-141.05374428274698,0,1200,2025-08-30_07-12-35,587,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756530755,50.0,704400,59755.96127986908,80.14128565788269,587
+705600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 90622.339, 'num_steps_sampled': 705600, 'update_time_ms': 2.583, 'num_steps_trained': 705600, 'load_time_ms': 0.621, 'default': {'kl': 0.012162242084741592, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.1651153564453125, 'total_loss': 10.699304580688477, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.09133824706077576, 'vf_explained_var': 0.9913658499717712, 'vf_loss': 10.778327941894531}, 'grad_time_ms': 690.449}",3934253,59853.30315685272,-151.6922593391394,cda-server-6,24,-165.94330068728993,{},14112,10.157.146.6,{},-141.05374428274698,0,1200,2025-08-30_07-14-13,588,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756530853,50.0,705600,59853.30315685272,97.34187698364258,588
+706800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92077.509, 'num_steps_sampled': 706800, 'update_time_ms': 2.504, 'num_steps_trained': 706800, 'load_time_ms': 0.65, 'default': {'kl': 0.01409607008099556, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.442818641662598, 'total_loss': 8.390382766723633, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12273070216178894, 'vf_explained_var': 0.994513213634491, 'vf_loss': 8.49884033203125}, 'grad_time_ms': 678.541}",3934253,59961.60695576668,-151.50156964718323,cda-server-6,24,-165.34163108568424,{},14136,10.157.146.6,{},-141.81389860999062,0,1200,2025-08-30_07-16-01,589,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756530961,50.0,706800,59961.60695576668,108.30379891395569,589
+708000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94196.583, 'num_steps_sampled': 708000, 'update_time_ms': 2.523, 'num_steps_trained': 708000, 'load_time_ms': 0.674, 'default': {'kl': 0.012521314434707165, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.323137283325195, 'total_loss': 10.42292308807373, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13767369091510773, 'vf_explained_var': 0.9919801354408264, 'vf_loss': 10.547918319702148}, 'grad_time_ms': 678.715}",3934253,60071.24426102638,-151.5101446258732,cda-server-6,24,-164.03042833185478,{},14160,10.157.146.6,{},-140.0657561986548,0,1200,2025-08-30_07-17-51,590,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756531071,50.0,708000,60071.24426102638,109.63730525970459,590
+709200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 96082.76, 'num_steps_sampled': 709200, 'update_time_ms': 2.382, 'num_steps_trained': 709200, 'load_time_ms': 0.687, 'default': {'kl': 0.011179720051586628, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.874238967895508, 'total_loss': 7.746560096740723, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12053953111171722, 'vf_explained_var': 0.993653416633606, 'vf_loss': 7.85577917098999}, 'grad_time_ms': 673.693}",3934253,60172.15154004097,-151.75990299087707,cda-server-6,24,-167.29179124485003,{},14184,10.157.146.6,{},-139.96449797766664,0,1200,2025-08-30_07-19-32,591,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756531172,50.0,709200,60172.15154004097,100.9072790145874,591
+710400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 98768.234, 'num_steps_sampled': 710400, 'update_time_ms': 2.353, 'num_steps_trained': 710400, 'load_time_ms': 0.697, 'default': {'kl': 0.011559142731130123, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.005263328552246, 'total_loss': 9.97242546081543, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11214432120323181, 'vf_explained_var': 0.9928156733512878, 'vf_loss': 10.07286548614502}, 'grad_time_ms': 679.973}",3934253,60283.61056137085,-151.78678105090998,cda-server-6,24,-167.29179124485003,{},14208,10.157.146.6,{},-139.96449797766664,0,1200,2025-08-30_07-21-23,592,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756531283,50.0,710400,60283.61056137085,111.45902132987976,592
+711600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 99394.054, 'num_steps_sampled': 711600, 'update_time_ms': 2.327, 'num_steps_trained': 711600, 'load_time_ms': 0.699, 'default': {'kl': 0.01326974667608738, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.2245941162109375, 'total_loss': 13.130340576171875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1341947764158249, 'vf_explained_var': 0.9907307028770447, 'vf_loss': 13.251100540161133}, 'grad_time_ms': 673.367}",3934253,60366.462671756744,-151.6194461096379,cda-server-6,24,-167.29179124485003,{},14232,10.157.146.6,{},-139.96449797766664,0,1200,2025-08-30_07-22-46,593,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756531366,50.0,711600,60366.462671756744,82.85211038589478,593
+712800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 99127.994, 'num_steps_sampled': 712800, 'update_time_ms': 2.34, 'num_steps_trained': 712800, 'load_time_ms': 0.702, 'default': {'kl': 0.013715913519263268, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.543368816375732, 'total_loss': 12.581001281738281, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1392856240272522, 'vf_explained_var': 0.9903258085250854, 'vf_loss': 12.706399917602539}, 'grad_time_ms': 693.584}",3934253,60463.921142578125,-151.841262826727,cda-server-6,24,-167.29179124485003,{},14256,10.157.146.6,{},-135.49370618230293,0,1200,2025-08-30_07-24-24,594,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756531464,50.0,712800,60463.921142578125,97.45847082138062,594
+714000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 97260.312, 'num_steps_sampled': 714000, 'update_time_ms': 2.389, 'num_steps_trained': 714000, 'load_time_ms': 0.696, 'default': {'kl': 0.014189370907843113, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.186726093292236, 'total_loss': 13.266934394836426, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11609578132629395, 'vf_explained_var': 0.9903583526611328, 'vf_loss': 13.368663787841797}, 'grad_time_ms': 700.24}",3934253,60547.2446205616,-151.6338203087051,cda-server-6,24,-164.4763763376484,{},14280,10.157.146.6,{},-135.49370618230293,0,1200,2025-08-30_07-25-47,595,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756531547,50.0,714000,60547.2446205616,83.32347798347473,595
+715200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95154.528, 'num_steps_sampled': 715200, 'update_time_ms': 2.427, 'num_steps_trained': 715200, 'load_time_ms': 0.691, 'default': {'kl': 0.011939617805182934, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.9514336585998535, 'total_loss': 15.354241371154785, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1097557544708252, 'vf_explained_var': 0.9887726306915283, 'vf_loss': 15.451909065246582}, 'grad_time_ms': 726.238}",3934253,60634.71063876152,-151.64690577669663,cda-server-6,24,-164.4763763376484,{},14304,10.157.146.6,{},-135.49370618230293,0,1200,2025-08-30_07-27-14,596,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756531634,50.0,715200,60634.71063876152,87.46601819992065,596
+716400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93652.735, 'num_steps_sampled': 716400, 'update_time_ms': 2.603, 'num_steps_trained': 716400, 'load_time_ms': 0.696, 'default': {'kl': 0.010730365291237831, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.129854202270508, 'total_loss': 18.592544555664062, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10800933092832565, 'vf_explained_var': 0.9868574142456055, 'vf_loss': 18.68968963623047}, 'grad_time_ms': 760.154}",3934253,60700.175520420074,-151.62646291803293,cda-server-6,24,-164.4763763376484,{},14328,10.157.146.6,{},-135.49370618230293,0,1200,2025-08-30_07-28-20,597,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756531700,50.0,716400,60700.175520420074,65.46488165855408,597
+717600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92106.567, 'num_steps_sampled': 717600, 'update_time_ms': 2.634, 'num_steps_trained': 717600, 'load_time_ms': 0.695, 'default': {'kl': 0.01186525821685791, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.202608108520508, 'total_loss': 14.549711227416992, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10322961211204529, 'vf_explained_var': 0.9883344173431396, 'vf_loss': 14.640926361083984}, 'grad_time_ms': 767.015}",3934253,60782.12470793724,-151.53312025163663,cda-server-6,24,-164.025584113289,{},14352,10.157.146.6,{},-140.24453536788127,0,1200,2025-08-30_07-29-42,598,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756531782,50.0,717600,60782.12470793724,81.94918751716614,598
+718800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 89806.981, 'num_steps_sampled': 718800, 'update_time_ms': 2.689, 'num_steps_trained': 718800, 'load_time_ms': 0.659, 'default': {'kl': 0.013243130408227444, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.225077152252197, 'total_loss': 18.396018981933594, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11471739411354065, 'vf_explained_var': 0.9890309572219849, 'vf_loss': 18.49732780456543}, 'grad_time_ms': 775.626}",3934253,60867.51846694946,-151.85888020618955,cda-server-6,24,-166.70198157607007,{},14376,10.157.146.6,{},-147.5689089852922,0,1200,2025-08-30_07-31-07,599,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756531867,50.0,718800,60867.51846694946,85.39375901222229,599
+720000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 91164.442, 'num_steps_sampled': 720000, 'update_time_ms': 2.654, 'num_steps_trained': 720000, 'load_time_ms': 0.636, 'default': {'kl': 0.01327840518206358, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.063553810119629, 'total_loss': 15.72727108001709, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10396900773048401, 'vf_explained_var': 0.9870481491088867, 'vf_loss': 15.81779670715332}, 'grad_time_ms': 776.607}",3934253,60990.73926758766,-151.6618704476836,cda-server-6,24,-166.70198157607007,{},14400,10.157.146.6,{},-147.3681517481075,0,1200,2025-08-30_07-33-10,600,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756531990,50.0,720000,60990.73926758766,123.22080063819885,600
+721200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 91809.072, 'num_steps_sampled': 721200, 'update_time_ms': 2.679, 'num_steps_trained': 721200, 'load_time_ms': 0.627, 'default': {'kl': 0.011526600457727909, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.281680583953857, 'total_loss': 18.19324493408203, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11207922548055649, 'vf_explained_var': 0.9853324890136719, 'vf_loss': 18.29365348815918}, 'grad_time_ms': 767.64}",3934253,61098.00409555435,-151.7562869870905,cda-server-6,24,-166.70198157607007,{},14424,10.157.146.6,{},-147.3681517481075,0,1200,2025-08-30_07-34-58,601,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756532098,50.0,721200,61098.00409555435,107.26482796669006,601
+722400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 90087.989, 'num_steps_sampled': 722400, 'update_time_ms': 2.721, 'num_steps_trained': 722400, 'load_time_ms': 0.617, 'default': {'kl': 0.01224952470511198, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.450062274932861, 'total_loss': 20.476715087890625, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11732758581638336, 'vf_explained_var': 0.9841304421424866, 'vf_loss': 20.581642150878906}, 'grad_time_ms': 783.345}",3934253,61192.40952897072,-152.16321712634695,cda-server-6,24,-166.70198157607007,{},14448,10.157.146.6,{},-147.3681517481075,0,1200,2025-08-30_07-36-32,602,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756532192,50.0,722400,61192.40952897072,94.40543341636658,602
+723600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 90615.528, 'num_steps_sampled': 723600, 'update_time_ms': 2.768, 'num_steps_trained': 723600, 'load_time_ms': 0.614, 'default': {'kl': 0.015451265498995781, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.148335933685303, 'total_loss': 11.77541732788086, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11515364050865173, 'vf_explained_var': 0.9903786182403564, 'vf_loss': 11.874926567077637}, 'grad_time_ms': 787.36}",3934253,61280.579090833664,-152.08872249419608,cda-server-6,24,-167.47636487522485,{},14472,10.157.146.6,{},-142.45120966923372,0,1200,2025-08-30_07-38-00,603,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756532280,50.0,723600,61280.579090833664,88.16956186294556,603
+724800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 87780.925, 'num_steps_sampled': 724800, 'update_time_ms': 2.845, 'num_steps_trained': 724800, 'load_time_ms': 0.615, 'default': {'kl': 0.014070438221096992, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.109999656677246, 'total_loss': 10.172300338745117, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12829262018203735, 'vf_explained_var': 0.9918432235717773, 'vf_loss': 10.286346435546875}, 'grad_time_ms': 784.586}",3934253,61349.66434311867,-152.43345766828455,cda-server-6,24,-167.9578979275503,{},14496,10.157.146.6,{},-142.45120966923372,0,1200,2025-08-30_07-39-10,604,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756532350,50.0,724800,61349.66434311867,69.08525228500366,604
+726000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 86887.291, 'num_steps_sampled': 726000, 'update_time_ms': 2.863, 'num_steps_trained': 726000, 'load_time_ms': 0.611, 'default': {'kl': 0.014225076884031296, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.7051849365234375, 'total_loss': 12.970050811767578, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12973900139331818, 'vf_explained_var': 0.9914548397064209, 'vf_loss': 13.085387229919434}, 'grad_time_ms': 790.49}",3934253,61424.110256910324,-152.20568940453862,cda-server-6,24,-167.9578979275503,{},14520,10.157.146.6,{},-141.36383384103294,0,1200,2025-08-30_07-40-24,605,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756532424,50.0,726000,61424.110256910324,74.4459137916565,605
+727200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 86061.442, 'num_steps_sampled': 727200, 'update_time_ms': 2.874, 'num_steps_trained': 727200, 'load_time_ms': 0.614, 'default': {'kl': 0.01197892241179943, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.155910015106201, 'total_loss': 16.024038314819336, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12084892392158508, 'vf_explained_var': 0.9875774383544922, 'vf_loss': 16.13275909423828}, 'grad_time_ms': 785.744}",3934253,61503.27158164978,-151.8809027765825,cda-server-6,24,-167.9578979275503,{},14544,10.157.146.6,{},-141.36383384103294,0,1200,2025-08-30_07-41-43,606,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756532503,50.0,727200,61503.27158164978,79.16132473945618,606
+728400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 89229.81, 'num_steps_sampled': 728400, 'update_time_ms': 2.72, 'num_steps_trained': 728400, 'load_time_ms': 0.609, 'default': {'kl': 0.012574922293424606, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.90366792678833, 'total_loss': 15.371590614318848, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11707106977701187, 'vf_explained_var': 0.9888659715652466, 'vf_loss': 15.475930213928223}, 'grad_time_ms': 786.113}",3934253,61600.42127537727,-151.49330662677073,cda-server-6,24,-167.9578979275503,{},14568,10.157.146.6,{},-141.36383384103294,0,1200,2025-08-30_07-43-20,607,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756532600,50.0,728400,61600.42127537727,97.14969372749329,607
+729600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 89682.331, 'num_steps_sampled': 729600, 'update_time_ms': 2.642, 'num_steps_trained': 729600, 'load_time_ms': 0.603, 'default': {'kl': 0.011613764800131321, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.039552688598633, 'total_loss': 18.735050201416016, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.09857099503278732, 'vf_explained_var': 0.9856353402137756, 'vf_loss': 18.821861267089844}, 'grad_time_ms': 775.753}",3934253,61686.7905664444,-151.40655440639014,cda-server-6,24,-165.37392545490812,{},14592,10.157.146.6,{},-141.36383384103294,0,1200,2025-08-30_07-44-47,608,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756532687,50.0,729600,61686.7905664444,86.36929106712341,608
+730800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92765.788, 'num_steps_sampled': 730800, 'update_time_ms': 2.676, 'num_steps_trained': 730800, 'load_time_ms': 0.608, 'default': {'kl': 0.01337195560336113, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.899675369262695, 'total_loss': 14.00875186920166, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10345722734928131, 'vf_explained_var': 0.9890311360359192, 'vf_loss': 14.09867000579834}, 'grad_time_ms': 771.632}",3934253,61802.97908568382,-151.37933562448453,cda-server-6,24,-165.37392545490812,{},14616,10.157.146.6,{},-141.66864327226546,0,1200,2025-08-30_07-46-43,609,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756532803,50.0,730800,61802.97908568382,116.18851923942566,609
+732000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 88623.712, 'num_steps_sampled': 732000, 'update_time_ms': 2.678, 'num_steps_trained': 732000, 'load_time_ms': 0.608, 'default': {'kl': 0.012054665014147758, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.8780741691589355, 'total_loss': 19.48150634765625, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10850019007921219, 'vf_explained_var': 0.9851784110069275, 'vf_loss': 19.577804565429688}, 'grad_time_ms': 774.118}",3934253,61884.80423927307,-151.41443393500313,cda-server-6,24,-161.8105244534275,{},14640,10.157.146.6,{},-141.63613739987613,0,1200,2025-08-30_07-48-05,610,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756532885,50.0,732000,61884.80423927307,81.82515358924866,610
+733200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 86950.836, 'num_steps_sampled': 733200, 'update_time_ms': 2.646, 'num_steps_trained': 733200, 'load_time_ms': 0.605, 'default': {'kl': 0.010721195489168167, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.638162136077881, 'total_loss': 11.81612491607666, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.09991056472063065, 'vf_explained_var': 0.9899557828903198, 'vf_loss': 11.905179977416992}, 'grad_time_ms': 787.336}",3934253,61975.47169351578,-151.543765128637,cda-server-6,24,-161.8105244534275,{},14664,10.157.146.6,{},-141.63613739987613,0,1200,2025-08-30_07-49-36,611,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756532976,50.0,733200,61975.47169351578,90.6674542427063,611
+734400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 85627.85, 'num_steps_sampled': 734400, 'update_time_ms': 2.679, 'num_steps_trained': 734400, 'load_time_ms': 0.603, 'default': {'kl': 0.013192672282457352, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.051061630249023, 'total_loss': 7.227845191955566, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1238882839679718, 'vf_explained_var': 0.9936915636062622, 'vf_loss': 7.338375568389893}, 'grad_time_ms': 784.194}",3934253,62056.61653780937,-151.44520802968128,cda-server-6,24,-161.8105244534275,{},14688,10.157.146.6,{},-141.63613739987613,0,1200,2025-08-30_07-50-57,612,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756533057,50.0,734400,62056.61653780937,81.14484429359436,612
+735600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 83882.7, 'num_steps_sampled': 735600, 'update_time_ms': 2.682, 'num_steps_trained': 735600, 'load_time_ms': 0.603, 'default': {'kl': 0.011554243043065071, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.660586833953857, 'total_loss': 10.637908935546875, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.09981474280357361, 'vf_explained_var': 0.9911506175994873, 'vf_loss': 10.726024627685547}, 'grad_time_ms': 786.349}",3934253,62127.3542406559,-151.22924727354598,cda-server-6,24,-159.87547682683376,{},14712,10.157.146.6,{},-139.68615046976356,0,1200,2025-08-30_07-52-07,613,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756533127,50.0,735600,62127.3542406559,70.7377028465271,613
+736800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 85648.444, 'num_steps_sampled': 736800, 'update_time_ms': 2.603, 'num_steps_trained': 736800, 'load_time_ms': 0.602, 'default': {'kl': 0.013088869862258434, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.049310207366943, 'total_loss': 12.22148609161377, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13127782940864563, 'vf_explained_var': 0.990606963634491, 'vf_loss': 12.33951187133789}, 'grad_time_ms': 792.797}",3934253,62214.16077184677,-151.31984968712513,cda-server-6,24,-162.72055261835348,{},14736,10.157.146.6,{},-139.68615046976356,0,1200,2025-08-30_07-53-34,614,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756533214,50.0,736800,62214.16077184677,86.80653119087219,614
+738000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 88803.749, 'num_steps_sampled': 738000, 'update_time_ms': 2.56, 'num_steps_trained': 738000, 'load_time_ms': 0.607, 'default': {'kl': 0.012645702809095383, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.928812026977539, 'total_loss': 10.566925048828125, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11527398228645325, 'vf_explained_var': 0.9913132190704346, 'vf_loss': 10.669394493103027}, 'grad_time_ms': 785.529}",3934253,62320.08752441406,-151.36229052843026,cda-server-6,24,-162.72055261835348,{},14760,10.157.146.6,{},-139.68615046976356,0,1200,2025-08-30_07-55-20,615,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756533320,50.0,738000,62320.08752441406,105.92675256729126,615
+739200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92349.761, 'num_steps_sampled': 739200, 'update_time_ms': 2.568, 'num_steps_trained': 739200, 'load_time_ms': 0.632, 'default': {'kl': 0.01190970279276371, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.102513790130615, 'total_loss': 9.939801216125488, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11454781144857407, 'vf_explained_var': 0.9917420148849487, 'vf_loss': 10.042292594909668}, 'grad_time_ms': 756.918}",3934253,62434.422278404236,-151.60210316670384,cda-server-6,24,-162.72055261835348,{},14784,10.157.146.6,{},-139.68615046976356,0,1200,2025-08-30_07-57-15,616,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756533435,50.0,739200,62434.422278404236,114.33475399017334,616
+740400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 91534.552, 'num_steps_sampled': 740400, 'update_time_ms': 2.573, 'num_steps_trained': 740400, 'load_time_ms': 0.633, 'default': {'kl': 0.013185751624405384, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.751632213592529, 'total_loss': 10.361977577209473, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1103510633111, 'vf_explained_var': 0.9916035532951355, 'vf_loss': 10.458977699279785}, 'grad_time_ms': 752.487}",3934253,62523.37660694122,-151.44191442940803,cda-server-6,24,-162.72055261835348,{},14808,10.157.146.6,{},-139.50587769520746,0,1200,2025-08-30_07-58-43,617,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756533523,50.0,740400,62523.37660694122,88.9543285369873,617
+741600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92642.693, 'num_steps_sampled': 741600, 'update_time_ms': 2.659, 'num_steps_trained': 741600, 'load_time_ms': 0.637, 'default': {'kl': 0.012119187042117119, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.803467750549316, 'total_loss': 8.588187217712402, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10707652568817139, 'vf_explained_var': 0.9933609366416931, 'vf_loss': 8.682992935180664}, 'grad_time_ms': 761.442}",3934253,62620.91872525215,-151.5307977259673,cda-server-6,24,-164.73135388198196,{},14832,10.157.146.6,{},-139.50587769520746,0,1200,2025-08-30_08-00-21,618,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756533621,50.0,741600,62620.91872525215,97.54211831092834,618
+742800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 91459.981, 'num_steps_sampled': 742800, 'update_time_ms': 2.627, 'num_steps_trained': 742800, 'load_time_ms': 0.658, 'default': {'kl': 0.013423633761703968, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.8948893547058105, 'total_loss': 12.06845474243164, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11208604276180267, 'vf_explained_var': 0.9899507164955139, 'vf_loss': 12.166949272155762}, 'grad_time_ms': 756.378}",3934253,62725.23010516167,-151.49945627137046,cda-server-6,24,-164.73135388198196,{},14856,10.157.146.6,{},-139.50587769520746,0,1200,2025-08-30_08-02-05,619,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756533725,50.0,742800,62725.23010516167,104.31137990951538,619
+744000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93561.719, 'num_steps_sampled': 744000, 'update_time_ms': 2.654, 'num_steps_trained': 744000, 'load_time_ms': 0.66, 'default': {'kl': 0.01322434563189745, 'cur_lr': 4.999999873689376e-05, 'entropy': 7.0396013259887695, 'total_loss': 13.521455764770508, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11213520169258118, 'vf_explained_var': 0.9895057678222656, 'vf_loss': 13.62020206451416}, 'grad_time_ms': 756.048}",3934253,62828.070397138596,-151.12993458929512,cda-server-6,24,-164.73135388198196,{},14880,10.157.146.6,{},-139.50587769520746,0,1200,2025-08-30_08-03-48,620,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756533828,50.0,744000,62828.070397138596,102.84029197692871,620
+745200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93545.314, 'num_steps_sampled': 745200, 'update_time_ms': 2.694, 'num_steps_trained': 745200, 'load_time_ms': 0.663, 'default': {'kl': 0.012171603739261627, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.692158222198486, 'total_loss': 6.1855010986328125, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11720164865255356, 'vf_explained_var': 0.9949302673339844, 'vf_loss': 6.290379047393799}, 'grad_time_ms': 752.822}",3934253,62918.541640520096,-151.14761090808298,cda-server-6,24,-164.73135388198196,{},14904,10.157.146.6,{},-140.95648442901637,0,1200,2025-08-30_08-05-19,621,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756533919,50.0,745200,62918.541640520096,90.47124338150024,621
+746400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93932.102, 'num_steps_sampled': 746400, 'update_time_ms': 2.631, 'num_steps_trained': 746400, 'load_time_ms': 0.661, 'default': {'kl': 0.013156522065401077, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.82674503326416, 'total_loss': 14.24813461303711, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11366318166255951, 'vf_explained_var': 0.9880519509315491, 'vf_loss': 14.34847640991211}, 'grad_time_ms': 751.554}",3934253,63003.54139351845,-151.18667908639287,cda-server-6,24,-164.73135388198196,{},14928,10.157.146.6,{},-139.44292993723454,0,1200,2025-08-30_08-06-44,622,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756534004,50.0,746400,63003.54139351845,84.99975299835205,622
+747600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 97851.94, 'num_steps_sampled': 747600, 'update_time_ms': 2.617, 'num_steps_trained': 747600, 'load_time_ms': 0.662, 'default': {'kl': 0.01315502543002367, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.802591323852539, 'total_loss': 7.619611740112305, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12538450956344604, 'vf_explained_var': 0.9940614700317383, 'vf_loss': 7.7316765785217285}, 'grad_time_ms': 752.627}",3934253,63113.488800525665,-151.09440919344684,cda-server-6,24,-164.42489643038954,{},14952,10.157.146.6,{},-139.44292993723454,0,1200,2025-08-30_08-08-34,623,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756534114,50.0,747600,63113.488800525665,109.94740700721741,623
+748800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 98750.161, 'num_steps_sampled': 748800, 'update_time_ms': 2.613, 'num_steps_trained': 748800, 'load_time_ms': 0.668, 'default': {'kl': 0.011096199974417686, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.810266494750977, 'total_loss': 11.71121597290039, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11723963171243668, 'vf_explained_var': 0.9900689721107483, 'vf_loss': 11.817220687866211}, 'grad_time_ms': 747.582}",3934253,63209.22741794586,-151.22068244121274,cda-server-6,24,-164.42489643038954,{},14976,10.157.146.6,{},-139.44292993723454,0,1200,2025-08-30_08-10-09,624,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756534209,50.0,748800,63209.22741794586,95.73861742019653,624
+750000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 100088.435, 'num_steps_sampled': 750000, 'update_time_ms': 2.645, 'num_steps_trained': 750000, 'load_time_ms': 0.668, 'default': {'kl': 0.010650668293237686, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.672452926635742, 'total_loss': 11.082969665527344, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.110077865421772, 'vf_explained_var': 0.9915443062782288, 'vf_loss': 11.18226432800293}, 'grad_time_ms': 757.859}",3934253,63328.63909459114,-151.40743659097222,cda-server-6,24,-164.42489643038954,{},15000,10.157.146.6,{},-139.44292993723454,0,1200,2025-08-30_08-12-09,625,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756534329,50.0,750000,63328.63909459114,119.41167664527893,625
+751200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 100166.948, 'num_steps_sampled': 751200, 'update_time_ms': 2.642, 'num_steps_trained': 751200, 'load_time_ms': 0.64, 'default': {'kl': 0.010317239910364151, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.8698625564575195, 'total_loss': 23.211606979370117, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10611388087272644, 'vf_explained_var': 0.9846295118331909, 'vf_loss': 23.307273864746094}, 'grad_time_ms': 773.449}",3934253,63443.91581988335,-151.66759500747776,cda-server-6,24,-163.6711908111485,{},15024,10.157.146.6,{},-144.90419533232387,0,1200,2025-08-30_08-14-04,626,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756534444,50.0,751200,63443.91581988335,115.27672529220581,626
+752400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 103369.137, 'num_steps_sampled': 752400, 'update_time_ms': 2.692, 'num_steps_trained': 752400, 'load_time_ms': 0.631, 'default': {'kl': 0.013233959674835205, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.71226692199707, 'total_loss': 15.275715827941895, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12592655420303345, 'vf_explained_var': 0.989207923412323, 'vf_loss': 15.38824462890625}, 'grad_time_ms': 744.545}",3934253,63564.602367162704,-151.4929936336819,cda-server-6,24,-163.6711908111485,{},15048,10.157.146.6,{},-139.5958818274101,0,1200,2025-08-30_08-16-05,627,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756534565,50.0,752400,63564.602367162704,120.68654727935791,627
+753600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 102544.866, 'num_steps_sampled': 753600, 'update_time_ms': 2.595, 'num_steps_trained': 753600, 'load_time_ms': 0.631, 'default': {'kl': 0.012792283669114113, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.9640302658081055, 'total_loss': 19.60162925720215, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11187508702278137, 'vf_explained_var': 0.9851052761077881, 'vf_loss': 19.700551986694336}, 'grad_time_ms': 736.195}",3934253,63653.81639122963,-151.669466943407,cda-server-6,24,-166.4776514019978,{},15072,10.157.146.6,{},-139.5958818274101,0,1200,2025-08-30_08-17-34,628,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756534654,50.0,753600,63653.81639122963,89.21402406692505,628
+754800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 101229.522, 'num_steps_sampled': 754800, 'update_time_ms': 2.535, 'num_steps_trained': 754800, 'load_time_ms': 0.607, 'default': {'kl': 0.010650486685335636, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.8045148849487305, 'total_loss': 13.200957298278809, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12197298556566238, 'vf_explained_var': 0.9894353151321411, 'vf_loss': 13.312145233154297}, 'grad_time_ms': 745.2}",3934253,63745.061317682266,-151.63851588055306,cda-server-6,24,-166.4776514019978,{},15096,10.157.146.6,{},-139.5958818274101,0,1200,2025-08-30_08-19-05,629,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756534745,50.0,754800,63745.061317682266,91.24492645263672,629
+756000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 100842.923, 'num_steps_sampled': 756000, 'update_time_ms': 2.551, 'num_steps_trained': 756000, 'load_time_ms': 0.609, 'default': {'kl': 0.010836427100002766, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.896514415740967, 'total_loss': 20.006378173828125, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11461702734231949, 'vf_explained_var': 0.9843152165412903, 'vf_loss': 20.110023498535156}, 'grad_time_ms': 724.066}",3934253,63843.82303214073,-151.58272379672468,cda-server-6,24,-166.4776514019978,{},15120,10.157.146.6,{},-139.5958818274101,0,1200,2025-08-30_08-20-44,630,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756534844,50.0,756000,63843.82303214073,98.76171445846558,630
+757200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 101480.276, 'num_steps_sampled': 757200, 'update_time_ms': 2.489, 'num_steps_trained': 757200, 'load_time_ms': 0.61, 'default': {'kl': 0.012623208574950695, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.808897972106934, 'total_loss': 15.05218505859375, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10825130343437195, 'vf_explained_var': 0.9871785640716553, 'vf_loss': 15.147655487060547}, 'grad_time_ms': 723.758}",3934253,63940.66364145279,-151.53546933288317,cda-server-6,24,-166.4776514019978,{},15144,10.157.146.6,{},-141.66980860902888,0,1200,2025-08-30_08-22-21,631,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756534941,50.0,757200,63940.66364145279,96.8406093120575,631
+758400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 100884.692, 'num_steps_sampled': 758400, 'update_time_ms': 2.45, 'num_steps_trained': 758400, 'load_time_ms': 0.64, 'default': {'kl': 0.01185892429202795, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.723665714263916, 'total_loss': 11.030750274658203, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10723483562469482, 'vf_explained_var': 0.9907653331756592, 'vf_loss': 11.125978469848633}, 'grad_time_ms': 717.742}",3934253,64019.64652919769,-151.36588981547644,cda-server-6,24,-158.80067592354442,{},15168,10.157.146.6,{},-141.66980860902888,0,1200,2025-08-30_08-23-40,632,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756535020,50.0,758400,64019.64652919769,78.98288774490356,632
+759600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 98671.08, 'num_steps_sampled': 759600, 'update_time_ms': 2.454, 'num_steps_trained': 759600, 'load_time_ms': 0.646, 'default': {'kl': 0.012938495725393295, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.659440994262695, 'total_loss': 13.684261322021484, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11048747599124908, 'vf_explained_var': 0.9885939359664917, 'vf_loss': 13.781648635864258}, 'grad_time_ms': 709.359}",3934253,64107.37429046631,-151.65608875992902,cda-server-6,24,-164.02453192680858,{},15192,10.157.146.6,{},-142.51797225071994,0,1200,2025-08-30_08-25-08,633,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756535108,50.0,759600,64107.37429046631,87.72776126861572,633
+760800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 98553.06, 'num_steps_sampled': 760800, 'update_time_ms': 2.495, 'num_steps_trained': 760800, 'load_time_ms': 0.636, 'default': {'kl': 0.012192122638225555, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.7756500244140625, 'total_loss': 8.989873886108398, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11869990825653076, 'vf_explained_var': 0.9927349090576172, 'vf_loss': 9.096230506896973}, 'grad_time_ms': 714.761}",3934253,64201.98797130585,-151.59236133085568,cda-server-6,24,-164.02453192680858,{},15216,10.157.146.6,{},-147.74976120463958,0,1200,2025-08-30_08-26-42,634,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756535202,50.0,760800,64201.98797130585,94.61368083953857,634
+762000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 99148.287, 'num_steps_sampled': 762000, 'update_time_ms': 2.504, 'num_steps_trained': 762000, 'load_time_ms': 0.633, 'default': {'kl': 0.012157265096902847, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.482801914215088, 'total_loss': 9.540739059448242, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11832654476165771, 'vf_explained_var': 0.9925000071525574, 'vf_loss': 9.646757125854492}, 'grad_time_ms': 704.804}",3934253,64327.25306916237,-151.5300366605358,cda-server-6,24,-164.02453192680858,{},15240,10.157.146.6,{},-145.57923844267256,0,1200,2025-08-30_08-28-48,635,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756535328,50.0,762000,64327.25306916237,125.2650978565216,635
+763200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94685.493, 'num_steps_sampled': 763200, 'update_time_ms': 2.451, 'num_steps_trained': 763200, 'load_time_ms': 0.639, 'default': {'kl': 0.011522796005010605, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.750631332397461, 'total_loss': 14.481304168701172, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11807700991630554, 'vf_explained_var': 0.9886033535003662, 'vf_loss': 14.587714195251465}, 'grad_time_ms': 725.526}",3934253,64398.10787272453,-151.58026426985145,cda-server-6,24,-166.86872080682477,{},15264,10.157.146.6,{},-135.66465642046649,0,1200,2025-08-30_08-29-59,636,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756535399,50.0,763200,64398.10787272453,70.8548035621643,636
+764400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 91776.876, 'num_steps_sampled': 764400, 'update_time_ms': 2.38, 'num_steps_trained': 764400, 'load_time_ms': 0.647, 'default': {'kl': 0.012320362962782383, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.598195552825928, 'total_loss': 13.947826385498047, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10195771604776382, 'vf_explained_var': 0.9885042905807495, 'vf_loss': 14.037308692932129}, 'grad_time_ms': 759.452}",3934253,64490.04764533043,-151.43070890493792,cda-server-6,24,-166.86872080682477,{},15288,10.157.146.6,{},-135.66465642046649,0,1200,2025-08-30_08-31-30,637,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756535490,50.0,764400,64490.04764533043,91.939772605896,637
+765600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94764.538, 'num_steps_sampled': 765600, 'update_time_ms': 2.38, 'num_steps_trained': 765600, 'load_time_ms': 0.642, 'default': {'kl': 0.01230735331773758, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.373239040374756, 'total_loss': 10.451953887939453, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11258859187364578, 'vf_explained_var': 0.991771399974823, 'vf_loss': 10.552081108093262}, 'grad_time_ms': 756.828}",3934253,64609.111683130264,-151.24051930669168,cda-server-6,24,-166.86872080682477,{},15312,10.157.146.6,{},-135.66465642046649,0,1200,2025-08-30_08-33-30,638,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756535610,50.0,765600,64609.111683130264,119.0640377998352,638
+766800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94182.613, 'num_steps_sampled': 766800, 'update_time_ms': 2.45, 'num_steps_trained': 766800, 'load_time_ms': 0.644, 'default': {'kl': 0.01306148525327444, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.652033805847168, 'total_loss': 11.028017044067383, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11691049486398697, 'vf_explained_var': 0.9918647408485413, 'vf_loss': 11.131702423095703}, 'grad_time_ms': 757.641}",3934253,64694.54643154144,-151.55331459979587,cda-server-6,24,-166.86872080682477,{},15336,10.157.146.6,{},-135.66465642046649,0,1200,2025-08-30_08-34-55,639,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756535695,50.0,766800,64694.54643154144,85.43474841117859,639
+768000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93906.791, 'num_steps_sampled': 768000, 'update_time_ms': 2.421, 'num_steps_trained': 768000, 'load_time_ms': 0.637, 'default': {'kl': 0.01332173403352499, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.823115825653076, 'total_loss': 10.166665077209473, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12121745198965073, 'vf_explained_var': 0.9914849400520325, 'vf_loss': 10.274394989013672}, 'grad_time_ms': 780.376}",3934253,64790.77740550041,-151.35041920654274,cda-server-6,24,-163.88208498966546,{},15360,10.157.146.6,{},-139.95728698257986,0,1200,2025-08-30_08-36-31,640,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756535791,50.0,768000,64790.77740550041,96.23097395896912,640
+769200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 96050.788, 'num_steps_sampled': 769200, 'update_time_ms': 2.539, 'num_steps_trained': 769200, 'load_time_ms': 0.635, 'default': {'kl': 0.012392531149089336, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.514824390411377, 'total_loss': 7.781041622161865, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12088058888912201, 'vf_explained_var': 0.9940726161003113, 'vf_loss': 7.889374732971191}, 'grad_time_ms': 778.119}",3934253,64909.03731918335,-150.96115774447182,cda-server-6,24,-163.88208498966546,{},15384,10.157.146.6,{},-139.95728698257986,0,1200,2025-08-30_08-38-30,641,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756535910,50.0,769200,64909.03731918335,118.25991368293762,641
+770400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 99890.034, 'num_steps_sampled': 770400, 'update_time_ms': 2.583, 'num_steps_trained': 770400, 'load_time_ms': 0.598, 'default': {'kl': 0.013650444336235523, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.526227951049805, 'total_loss': 17.587209701538086, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10258938372135162, 'vf_explained_var': 0.9885136485099792, 'vf_loss': 17.67597770690918}, 'grad_time_ms': 782.955}",3934253,65026.46193361282,-151.0723561949242,cda-server-6,24,-163.88208498966546,{},15408,10.157.146.6,{},-142.86892682381847,0,1200,2025-08-30_08-40-27,642,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756536027,50.0,770400,65026.46193361282,117.42461442947388,642
+771600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 101276.207, 'num_steps_sampled': 771600, 'update_time_ms': 2.602, 'num_steps_trained': 771600, 'load_time_ms': 0.601, 'default': {'kl': 0.013796964660286903, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.629274368286133, 'total_loss': 8.684115409851074, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12487681955099106, 'vf_explained_var': 0.9934294819831848, 'vf_loss': 8.795022964477539}, 'grad_time_ms': 778.614}",3934253,65128.00844717026,-151.140397750438,cda-server-6,24,-165.09382066300637,{},15432,10.157.146.6,{},-141.67803283239894,0,1200,2025-08-30_08-42-09,643,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756536129,50.0,771600,65128.00844717026,101.54651355743408,643
+772800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 98711.301, 'num_steps_sampled': 772800, 'update_time_ms': 2.525, 'num_steps_trained': 772800, 'load_time_ms': 0.606, 'default': {'kl': 0.011933304369449615, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.656396865844727, 'total_loss': 10.540224075317383, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12175066024065018, 'vf_explained_var': 0.9919617176055908, 'vf_loss': 10.649892807006836}, 'grad_time_ms': 774.185}",3934253,65196.92683053017,-151.43561850808175,cda-server-6,24,-165.09382066300637,{},15456,10.157.146.6,{},-141.67803283239894,0,1200,2025-08-30_08-43-17,644,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756536197,50.0,772800,65196.92683053017,68.91838335990906,644
+774000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94945.91, 'num_steps_sampled': 774000, 'update_time_ms': 2.5, 'num_steps_trained': 774000, 'load_time_ms': 0.622, 'default': {'kl': 0.012951802462339401, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.6081223487854, 'total_loss': 11.552346229553223, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12751348316669464, 'vf_explained_var': 0.9913455247879028, 'vf_loss': 11.66674518585205}, 'grad_time_ms': 776.546}",3934253,65284.56107521057,-151.55425962251707,cda-server-6,24,-165.09382066300637,{},15480,10.157.146.6,{},-139.4596296357344,0,1200,2025-08-30_08-44-45,645,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756536285,50.0,774000,65284.56107521057,87.63424468040466,645
+775200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 97436.799, 'num_steps_sampled': 775200, 'update_time_ms': 2.503, 'num_steps_trained': 775200, 'load_time_ms': 0.616, 'default': {'kl': 0.01226724311709404, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.506907939910889, 'total_loss': 11.610782623291016, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11629611998796463, 'vf_explained_var': 0.990890622138977, 'vf_loss': 11.7146577835083}, 'grad_time_ms': 776.118}",3934253,65380.31948065758,-151.60373988103257,cda-server-6,24,-165.09382066300637,{},15504,10.157.146.6,{},-139.4596296357344,0,1200,2025-08-30_08-46-21,646,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756536381,50.0,775200,65380.31948065758,95.75840544700623,646
+776400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 98575.832, 'num_steps_sampled': 776400, 'update_time_ms': 2.522, 'num_steps_trained': 776400, 'load_time_ms': 0.612, 'default': {'kl': 0.013378623872995377, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.336060047149658, 'total_loss': 6.171751022338867, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11012449115514755, 'vf_explained_var': 0.9950565695762634, 'vf_loss': 6.268329620361328}, 'grad_time_ms': 770.747}",3934253,65483.59596991539,-151.69143272461713,cda-server-6,24,-164.05178551685933,{},15528,10.157.146.6,{},-139.4596296357344,0,1200,2025-08-30_08-48-04,647,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756536484,50.0,776400,65483.59596991539,103.2764892578125,647
+777600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 98555.02, 'num_steps_sampled': 777600, 'update_time_ms': 2.559, 'num_steps_trained': 777600, 'load_time_ms': 0.62, 'default': {'kl': 0.012983070686459541, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.5856804847717285, 'total_loss': 7.57802677154541, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11344198137521744, 'vf_explained_var': 0.9940193891525269, 'vf_loss': 7.678323745727539}, 'grad_time_ms': 779.093}",3934253,65602.53673911095,-151.45275960946202,cda-server-6,24,-164.05178551685933,{},15552,10.157.146.6,{},-139.4596296357344,0,1200,2025-08-30_08-50-03,648,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756536603,50.0,777600,65602.53673911095,118.94076919555664,648
+778800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 99848.798, 'num_steps_sampled': 778800, 'update_time_ms': 2.549, 'num_steps_trained': 778800, 'load_time_ms': 0.617, 'default': {'kl': 0.011338386684656143, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.613151550292969, 'total_loss': 12.30356216430664, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10434151440858841, 'vf_explained_var': 0.9906575679779053, 'vf_loss': 12.39642333984375}, 'grad_time_ms': 763.442}",3934253,65700.7525241375,-151.33954524857702,cda-server-6,24,-164.05178551685933,{},15576,10.157.146.6,{},-141.78754009526514,0,1200,2025-08-30_08-51-41,649,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756536701,50.0,778800,65700.7525241375,98.2157850265503,649
+780000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 100781.345, 'num_steps_sampled': 780000, 'update_time_ms': 2.527, 'num_steps_trained': 780000, 'load_time_ms': 0.622, 'default': {'kl': 0.013383557088673115, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.252384185791016, 'total_loss': 9.627680778503418, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11854615807533264, 'vf_explained_var': 0.9921794533729553, 'vf_loss': 9.732675552368164}, 'grad_time_ms': 765.889}",3934253,65806.33282995224,-151.20732308144824,cda-server-6,24,-164.05178551685933,{},15600,10.157.146.6,{},-141.78754009526514,0,1200,2025-08-30_08-53-27,650,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756536807,50.0,780000,65806.33282995224,105.58030581474304,650
+781200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 98930.099, 'num_steps_sampled': 781200, 'update_time_ms': 2.402, 'num_steps_trained': 781200, 'load_time_ms': 0.621, 'default': {'kl': 0.012757916003465652, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.42793083190918, 'total_loss': 10.180928230285645, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1097208559513092, 'vf_explained_var': 0.99164879322052, 'vf_loss': 10.277731895446777}, 'grad_time_ms': 763.95}",3934253,65906.05948472023,-150.76600787910772,cda-server-6,24,-161.78974099861574,{},15624,10.157.146.6,{},-141.78754009526514,0,1200,2025-08-30_08-55-07,651,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756536907,50.0,781200,65906.05948472023,99.72665476799011,651
+782400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95713.624, 'num_steps_sampled': 782400, 'update_time_ms': 2.422, 'num_steps_trained': 782400, 'load_time_ms': 0.626, 'default': {'kl': 0.011412886902689934, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.393667221069336, 'total_loss': 19.408985137939453, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11556919664144516, 'vf_explained_var': 0.9846268892288208, 'vf_loss': 19.512996673583984}, 'grad_time_ms': 767.191}",3934253,65991.35186958313,-151.0416711676692,cda-server-6,24,-162.9573703906747,{},15648,10.157.146.6,{},-142.75696429563217,0,1200,2025-08-30_08-56-32,652,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756536992,50.0,782400,65991.35186958313,85.29238486289978,652
+783600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94709.237, 'num_steps_sampled': 783600, 'update_time_ms': 2.422, 'num_steps_trained': 783600, 'load_time_ms': 0.624, 'default': {'kl': 0.012586956843733788, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.694999694824219, 'total_loss': 8.255680084228516, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12337406724691391, 'vf_explained_var': 0.9931978583335876, 'vf_loss': 8.36630916595459}, 'grad_time_ms': 776.443}",3934253,66082.94680023193,-151.26611398768958,cda-server-6,24,-164.28386676227794,{},15672,10.157.146.6,{},-142.75696429563217,0,1200,2025-08-30_08-58-04,653,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756537084,50.0,783600,66082.94680023193,91.59493064880371,653
+784800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93480.09, 'num_steps_sampled': 784800, 'update_time_ms': 2.477, 'num_steps_trained': 784800, 'load_time_ms': 0.63, 'default': {'kl': 0.011648065410554409, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.439423561096191, 'total_loss': 6.595895290374756, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1131308376789093, 'vf_explained_var': 0.9945122599601746, 'vf_loss': 6.697232723236084}, 'grad_time_ms': 782.817}",3934253,66139.63784337044,-151.421835252641,cda-server-6,24,-166.17811680513293,{},15696,10.157.146.6,{},-142.75696429563217,0,1200,2025-08-30_08-59-00,654,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756537140,50.0,784800,66139.63784337044,56.69104313850403,654
+786000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93421.758, 'num_steps_sampled': 786000, 'update_time_ms': 2.448, 'num_steps_trained': 786000, 'load_time_ms': 0.617, 'default': {'kl': 0.012986731715500355, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.728307723999023, 'total_loss': 12.930699348449707, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12061098217964172, 'vf_explained_var': 0.9898444414138794, 'vf_loss': 13.038162231445312}, 'grad_time_ms': 757.218}",3934253,66226.43191671371,-151.55709439030414,cda-server-6,24,-166.17811680513293,{},15720,10.157.146.6,{},-141.89106674327246,0,1200,2025-08-30_09-00-27,655,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756537227,50.0,786000,66226.43191671371,86.79407334327698,655
+787200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 91060.581, 'num_steps_sampled': 787200, 'update_time_ms': 2.515, 'num_steps_trained': 787200, 'load_time_ms': 0.615, 'default': {'kl': 0.012636389583349228, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.8368353843688965, 'total_loss': 14.384733200073242, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13676336407661438, 'vf_explained_var': 0.9892227649688721, 'vf_loss': 14.50870132446289}, 'grad_time_ms': 754.775}",3934253,66298.55557537079,-151.40458382084128,cda-server-6,24,-166.17811680513293,{},15744,10.157.146.6,{},-141.54462597260832,0,1200,2025-08-30_09-01-39,656,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756537299,50.0,787200,66298.55557537079,72.12365865707397,656
+788400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 86574.116, 'num_steps_sampled': 788400, 'update_time_ms': 2.507, 'num_steps_trained': 788400, 'load_time_ms': 0.619, 'default': {'kl': 0.013919010758399963, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.683310508728027, 'total_loss': 8.733895301818848, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11779823899269104, 'vf_explained_var': 0.992950439453125, 'vf_loss': 8.837601661682129}, 'grad_time_ms': 761.237}",3934253,66357.03185558319,-151.25175747424433,cda-server-6,24,-166.17811680513293,{},15768,10.157.146.6,{},-141.54462597260832,0,1200,2025-08-30_09-02-38,657,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756537358,50.0,788400,66357.03185558319,58.476280212402344,657
+789600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 82118.274, 'num_steps_sampled': 789600, 'update_time_ms': 2.47, 'num_steps_trained': 789600, 'load_time_ms': 0.616, 'default': {'kl': 0.012269611470401287, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.552437782287598, 'total_loss': 12.102431297302246, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1195855513215065, 'vf_explained_var': 0.990055501461029, 'vf_loss': 12.209592819213867}, 'grad_time_ms': 766.057}",3934253,66431.46171355247,-151.0359636759865,cda-server-6,24,-163.4725483196033,{},15792,10.157.146.6,{},-141.54462597260832,0,1200,2025-08-30_09-03-52,658,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756537432,50.0,789600,66431.46171355247,74.42985796928406,658
+790800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 83903.982, 'num_steps_sampled': 790800, 'update_time_ms': 2.472, 'num_steps_trained': 790800, 'load_time_ms': 0.62, 'default': {'kl': 0.007937086746096611, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.613508701324463, 'total_loss': 45.66404724121094, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1019890308380127, 'vf_explained_var': 0.9751158952713013, 'vf_loss': 45.757999420166016}, 'grad_time_ms': 774.19}",3934253,66547.61732769012,-151.75462206130666,cda-server-6,24,-209.2673208160466,{},15816,10.157.146.6,{},-141.54462597260832,0,1200,2025-08-30_09-05-48,659,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756537548,50.0,790800,66547.61732769012,116.15561413764954,659
+792000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 82488.539, 'num_steps_sampled': 792000, 'update_time_ms': 2.46, 'num_steps_trained': 792000, 'load_time_ms': 0.622, 'default': {'kl': 0.012799741700291634, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.50419807434082, 'total_loss': 8.887038230895996, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1166180744767189, 'vf_explained_var': 0.9929201602935791, 'vf_loss': 8.990696907043457}, 'grad_time_ms': 780.299}",3934253,66639.10441493988,-152.13930837402907,cda-server-6,24,-209.2673208160466,{},15840,10.157.146.6,{},-141.75838048415804,0,1200,2025-08-30_09-07-20,660,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756537640,50.0,792000,66639.10441493988,91.48708724975586,660
+793200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 81544.232, 'num_steps_sampled': 793200, 'update_time_ms': 2.483, 'num_steps_trained': 793200, 'load_time_ms': 0.623, 'default': {'kl': 0.013526301831007004, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.520646572113037, 'total_loss': 6.78563117980957, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1247837170958519, 'vf_explained_var': 0.9944746494293213, 'vf_loss': 6.896719455718994}, 'grad_time_ms': 787.181}",3934253,66729.45693945885,-152.25459962815407,cda-server-6,24,-209.2673208160466,{},15864,10.157.146.6,{},-141.75838048415804,0,1200,2025-08-30_09-08-50,661,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756537730,50.0,793200,66729.45693945885,90.35252451896667,661
+794400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 81566.335, 'num_steps_sampled': 794400, 'update_time_ms': 2.481, 'num_steps_trained': 794400, 'load_time_ms': 0.624, 'default': {'kl': 0.009709502570331097, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.697580814361572, 'total_loss': 31.00704574584961, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10691169649362564, 'vf_explained_var': 0.9766644239425659, 'vf_loss': 31.1041259765625}, 'grad_time_ms': 784.331}",3934253,66814.94206523895,-152.15130963223993,cda-server-6,24,-209.2673208160466,{},15888,10.157.146.6,{},-141.75838048415804,0,1200,2025-08-30_09-10-16,662,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756537816,50.0,794400,66814.94206523895,85.48512578010559,662
+795600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 81604.465, 'num_steps_sampled': 795600, 'update_time_ms': 2.449, 'num_steps_trained': 795600, 'load_time_ms': 0.63, 'default': {'kl': 0.011459432542324066, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.440579891204834, 'total_loss': 9.810572624206543, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11177754402160645, 'vf_explained_var': 0.9926278591156006, 'vf_loss': 9.910746574401855}, 'grad_time_ms': 753.241}",3934253,66906.60725140572,-151.43457264234567,cda-server-6,24,-164.53212164937042,{},15912,10.157.146.6,{},-141.75838048415804,0,1200,2025-08-30_09-11-47,663,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756537907,50.0,795600,66906.60725140572,91.6651861667633,663
+796800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 85209.039, 'num_steps_sampled': 796800, 'update_time_ms': 2.44, 'num_steps_trained': 796800, 'load_time_ms': 0.62, 'default': {'kl': 0.014888007193803787, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.557436466217041, 'total_loss': 7.459188461303711, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1355879008769989, 'vf_explained_var': 0.994841992855072, 'vf_loss': 7.579701900482178}, 'grad_time_ms': 754.921}",3934253,66999.36083936691,-151.34236627439955,cda-server-6,24,-164.53212164937042,{},15936,10.157.146.6,{},-142.46980255395553,0,1200,2025-08-30_09-13-20,664,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756538000,50.0,796800,66999.36083936691,92.7535879611969,664
+798000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 86016.806, 'num_steps_sampled': 798000, 'update_time_ms': 2.522, 'num_steps_trained': 798000, 'load_time_ms': 0.624, 'default': {'kl': 0.011667725630104542, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.511973857879639, 'total_loss': 17.17188262939453, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11145953088998795, 'vf_explained_var': 0.9879921674728394, 'vf_loss': 17.271528244018555}, 'grad_time_ms': 770.477}",3934253,67094.38917398453,-151.15047415516864,cda-server-6,24,-159.77875482028378,{},15960,10.157.146.6,{},-142.46980255395553,0,1200,2025-08-30_09-14-55,665,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756538095,50.0,798000,67094.38917398453,95.02833461761475,665
+799200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 87249.057, 'num_steps_sampled': 799200, 'update_time_ms': 2.486, 'num_steps_trained': 799200, 'load_time_ms': 0.627, 'default': {'kl': 0.012102197855710983, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.43237829208374, 'total_loss': 7.126491069793701, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11670617014169693, 'vf_explained_var': 0.994476318359375, 'vf_loss': 7.23094367980957}, 'grad_time_ms': 770.622}",3934253,67178.83634185791,-151.30152287858394,cda-server-6,24,-159.77875482028378,{},15984,10.157.146.6,{},-142.46980255395553,0,1200,2025-08-30_09-16-20,666,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756538180,50.0,799200,67178.83634185791,84.44716787338257,666
+800400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 91353.758, 'num_steps_sampled': 800400, 'update_time_ms': 2.512, 'num_steps_trained': 800400, 'load_time_ms': 0.628, 'default': {'kl': 0.011202414520084858, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.938111782073975, 'total_loss': 28.343528747558594, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11333584785461426, 'vf_explained_var': 0.9814093112945557, 'vf_loss': 28.44552230834961}, 'grad_time_ms': 744.531}",3934253,67278.0993475914,-151.50161338262103,cda-server-6,24,-165.1941954418082,{},16008,10.157.146.6,{},-142.46980255395553,0,1200,2025-08-30_09-17-59,667,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756538279,50.0,800400,67278.0993475914,99.26300573348999,667
+801600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94170.663, 'num_steps_sampled': 801600, 'update_time_ms': 2.597, 'num_steps_trained': 801600, 'load_time_ms': 0.64, 'default': {'kl': 0.013727385550737381, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.836697578430176, 'total_loss': 8.947104454040527, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.13056008517742157, 'vf_explained_var': 0.9923450946807861, 'vf_loss': 9.063766479492188}, 'grad_time_ms': 728.366}",3934253,67380.53780794144,-151.21565188693177,cda-server-6,24,-165.1941954418082,{},16032,10.157.146.6,{},-142.47529502612474,0,1200,2025-08-30_09-19-41,668,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756538381,50.0,801600,67380.53780794144,102.43846035003662,668
+802800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93225.319, 'num_steps_sampled': 802800, 'update_time_ms': 2.595, 'num_steps_trained': 802800, 'load_time_ms': 0.641, 'default': {'kl': 0.012475317344069481, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.533681392669678, 'total_loss': 8.152522087097168, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11396972090005875, 'vf_explained_var': 0.9932054877281189, 'vf_loss': 8.253859519958496}, 'grad_time_ms': 722.125}",3934253,67487.17651033401,-151.30825228574636,cda-server-6,24,-165.1941954418082,{},16056,10.157.146.6,{},-142.47529502612474,0,1200,2025-08-30_09-21-28,669,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756538488,50.0,802800,67487.17651033401,106.63870239257812,669
+804000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93995.465, 'num_steps_sampled': 804000, 'update_time_ms': 2.617, 'num_steps_trained': 804000, 'load_time_ms': 0.64, 'default': {'kl': 0.012943493202328682, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.564505100250244, 'total_loss': 6.670261383056641, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11613664031028748, 'vf_explained_var': 0.9946820139884949, 'vf_loss': 6.773292541503906}, 'grad_time_ms': 719.747}",3934253,67586.34124970436,-151.18661510618227,cda-server-6,24,-165.1941954418082,{},16080,10.157.146.6,{},-142.47529502612474,0,1200,2025-08-30_09-23-07,670,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756538587,50.0,804000,67586.34124970436,99.16473937034607,670
+805200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94262.434, 'num_steps_sampled': 805200, 'update_time_ms': 2.627, 'num_steps_trained': 805200, 'load_time_ms': 0.641, 'default': {'kl': 0.01226672250777483, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.056154727935791, 'total_loss': 8.908513069152832, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12133461982011795, 'vf_explained_var': 0.9923565983772278, 'vf_loss': 9.017428398132324}, 'grad_time_ms': 718.622}",3934253,67679.35307192802,-151.23372935764777,cda-server-6,24,-165.1941954418082,{},16104,10.157.146.6,{},-142.47529502612474,0,1200,2025-08-30_09-24-40,671,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756538680,50.0,805200,67679.35307192802,93.01182222366333,671
+806400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95903.582, 'num_steps_sampled': 806400, 'update_time_ms': 2.629, 'num_steps_trained': 806400, 'load_time_ms': 0.64, 'default': {'kl': 0.013150524348020554, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.534552574157715, 'total_loss': 10.642876625061035, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12220504879951477, 'vf_explained_var': 0.9922248125076294, 'vf_loss': 10.751766204833984}, 'grad_time_ms': 725.111}",3934253,67781.31496477127,-151.34883223728843,cda-server-6,24,-163.48883436689144,{},16128,10.157.146.6,{},-144.58835193512377,0,1200,2025-08-30_09-26-22,672,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756538782,50.0,806400,67781.31496477127,101.96189284324646,672
+807600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 96976.531, 'num_steps_sampled': 807600, 'update_time_ms': 2.639, 'num_steps_trained': 807600, 'load_time_ms': 0.628, 'default': {'kl': 0.012908346019685268, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.433506011962891, 'total_loss': 11.394515037536621, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11172091960906982, 'vf_explained_var': 0.990711510181427, 'vf_loss': 11.493165969848633}, 'grad_time_ms': 745.565}",3934253,67883.91454315186,-151.22890771549737,cda-server-6,24,-163.48883436689144,{},16152,10.157.146.6,{},-139.43140026304368,0,1200,2025-08-30_09-28-05,673,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756538885,50.0,807600,67883.91454315186,102.59957838058472,673
+808800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 98556.756, 'num_steps_sampled': 808800, 'update_time_ms': 2.654, 'num_steps_trained': 808800, 'load_time_ms': 0.634, 'default': {'kl': 0.013200155459344387, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.696569442749023, 'total_loss': 7.806126117706299, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12211307138204575, 'vf_explained_var': 0.9940935969352722, 'vf_loss': 7.9148736000061035}, 'grad_time_ms': 741.058}",3934253,67992.4255001545,-151.68195875737942,cda-server-6,24,-164.95892603061685,{},16176,10.157.146.6,{},-139.43140026304368,0,1200,2025-08-30_09-29-53,674,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756538993,50.0,808800,67992.4255001545,108.51095700263977,674
+810000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 100854.048, 'num_steps_sampled': 810000, 'update_time_ms': 2.611, 'num_steps_trained': 810000, 'load_time_ms': 0.628, 'default': {'kl': 0.011847835965454578, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.6236677169799805, 'total_loss': 12.153740882873535, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11479135602712631, 'vf_explained_var': 0.9907311201095581, 'vf_loss': 12.256536483764648}, 'grad_time_ms': 750.617}",3934253,68110.52215981483,-151.57957172871915,cda-server-6,24,-164.95892603061685,{},16200,10.157.146.6,{},-139.43140026304368,0,1200,2025-08-30_09-31-52,675,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756539112,50.0,810000,68110.52215981483,118.09665966033936,675
+811200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 102943.044, 'num_steps_sampled': 811200, 'update_time_ms': 2.666, 'num_steps_trained': 811200, 'load_time_ms': 0.647, 'default': {'kl': 0.012600626796483994, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.64918851852417, 'total_loss': 16.147125244140625, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.14403095841407776, 'vf_explained_var': 0.98751300573349, 'vf_loss': 16.278398513793945}, 'grad_time_ms': 744.666}",3934253,68215.8010263443,-151.69943842256066,cda-server-6,24,-168.19080211933337,{},16224,10.157.146.6,{},-139.43140026304368,0,1200,2025-08-30_09-33-37,676,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756539217,50.0,811200,68215.8010263443,105.27886652946472,676
+812400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 101904.919, 'num_steps_sampled': 812400, 'update_time_ms': 2.604, 'num_steps_trained': 812400, 'load_time_ms': 0.647, 'default': {'kl': 0.012451926246285439, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.550738334655762, 'total_loss': 8.950202941894531, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11671951413154602, 'vf_explained_var': 0.9930202960968018, 'vf_loss': 9.054315567016602}, 'grad_time_ms': 767.029}",3934253,68304.90540742874,-151.9255237656048,cda-server-6,24,-168.19080211933337,{},16248,10.157.146.6,{},-141.6557397808477,0,1200,2025-08-30_09-35-06,677,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756539306,50.0,812400,68304.90540742874,89.10438108444214,677
+813600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 100236.082, 'num_steps_sampled': 813600, 'update_time_ms': 2.596, 'num_steps_trained': 813600, 'load_time_ms': 0.634, 'default': {'kl': 0.011645686812698841, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.426385879516602, 'total_loss': 25.895009994506836, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10874418914318085, 'vf_explained_var': 0.9853691458702087, 'vf_loss': 25.991962432861328}, 'grad_time_ms': 781.1}",3934253,68390.79599404335,-151.39686840499604,cda-server-6,24,-168.19080211933337,{},16272,10.157.146.6,{},-141.6557397808477,0,1200,2025-08-30_09-36-32,678,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756539392,50.0,813600,68390.79599404335,85.89058661460876,678
+814800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 100186.238, 'num_steps_sampled': 814800, 'update_time_ms': 2.597, 'num_steps_trained': 814800, 'load_time_ms': 0.633, 'default': {'kl': 0.012054681777954102, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.241293907165527, 'total_loss': 9.844844818115234, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10538104176521301, 'vf_explained_var': 0.9917342066764832, 'vf_loss': 9.938020706176758}, 'grad_time_ms': 786.948}",3934253,68496.99503946304,-151.08533412484206,cda-server-6,24,-168.19080211933337,{},16296,10.157.146.6,{},-141.74999265829365,0,1200,2025-08-30_09-38-18,679,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756539498,50.0,814800,68496.99503946304,106.199045419693,679
+816000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 99076.14, 'num_steps_sampled': 816000, 'update_time_ms': 2.567, 'num_steps_trained': 816000, 'load_time_ms': 0.629, 'default': {'kl': 0.012802320532500744, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.331047534942627, 'total_loss': 10.05265998840332, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11535504460334778, 'vf_explained_var': 0.9921693801879883, 'vf_loss': 10.155052185058594}, 'grad_time_ms': 781.558}",3934253,68585.0056154728,-150.79877807890264,cda-server-6,24,-162.53422148169122,{},16320,10.157.146.6,{},-141.74999265829365,0,1200,2025-08-30_09-39-46,680,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756539586,50.0,816000,68585.0056154728,88.01057600975037,680
+817200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 97339.498, 'num_steps_sampled': 817200, 'update_time_ms': 2.579, 'num_steps_trained': 817200, 'load_time_ms': 0.626, 'default': {'kl': 0.012418713420629501, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.361508846282959, 'total_loss': 8.864505767822266, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12446033954620361, 'vf_explained_var': 0.9929365515708923, 'vf_loss': 8.976390838623047}, 'grad_time_ms': 780.2}",3934253,68660.64840269089,-150.8476379489054,cda-server-6,24,-163.65163740307503,{},16344,10.157.146.6,{},-140.52325129365028,0,1200,2025-08-30_09-41-02,681,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756539662,50.0,817200,68660.64840269089,75.64278721809387,681
+818400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95654.015, 'num_steps_sampled': 818400, 'update_time_ms': 2.58, 'num_steps_trained': 818400, 'load_time_ms': 0.625, 'default': {'kl': 0.011963529512286186, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.352933883666992, 'total_loss': 14.166751861572266, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.12253758311271667, 'vf_explained_var': 0.9886897206306458, 'vf_loss': 14.277175903320312}, 'grad_time_ms': 779.43}",3934253,68745.74711084366,-150.81457327193596,cda-server-6,24,-163.87497421953273,{},16368,10.157.146.6,{},-140.52325129365028,0,1200,2025-08-30_09-42-27,682,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756539747,50.0,818400,68745.74711084366,85.098708152771,682
+819600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 96394.793, 'num_steps_sampled': 819600, 'update_time_ms': 2.6, 'num_steps_trained': 819600, 'load_time_ms': 0.642, 'default': {'kl': 0.011911649256944656, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.103545188903809, 'total_loss': 9.033626556396484, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1012423187494278, 'vf_explained_var': 0.9925553202629089, 'vf_loss': 9.122809410095215}, 'grad_time_ms': 785.224}",3934253,68855.81179380417,-150.8460074407685,cda-server-6,24,-163.87497421953273,{},16392,10.157.146.6,{},-140.52325129365028,0,1200,2025-08-30_09-44-17,683,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756539857,50.0,819600,68855.81179380417,110.06468296051025,683
+820800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 96089.793, 'num_steps_sampled': 820800, 'update_time_ms': 2.585, 'num_steps_trained': 820800, 'load_time_ms': 0.646, 'default': {'kl': 0.01197890192270279, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.369986534118652, 'total_loss': 9.048433303833008, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11695381999015808, 'vf_explained_var': 0.9941478371620178, 'vf_loss': 9.15325927734375}, 'grad_time_ms': 779.6}",3934253,68961.21634984016,-151.20101435769047,cda-server-6,24,-163.87497421953273,{},16416,10.157.146.6,{},-140.52325129365028,0,1200,2025-08-30_09-46-02,684,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756539962,50.0,820800,68961.21634984016,105.40455603599548,684
+822000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 95277.527, 'num_steps_sampled': 822000, 'update_time_ms': 2.612, 'num_steps_trained': 822000, 'load_time_ms': 0.643, 'default': {'kl': 0.012461802922189236, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.272339344024658, 'total_loss': 5.204405784606934, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11976167559623718, 'vf_explained_var': 0.9955686926841736, 'vf_loss': 5.311550617218018}, 'grad_time_ms': 781.68}",3934253,69071.21107387543,-151.26061614676337,cda-server-6,24,-163.87497421953273,{},16440,10.157.146.6,{},-141.8100592191962,0,1200,2025-08-30_09-47-52,685,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756540072,50.0,822000,69071.21107387543,109.99472403526306,685
+823200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 92447.797, 'num_steps_sampled': 823200, 'update_time_ms': 2.543, 'num_steps_trained': 823200, 'load_time_ms': 0.629, 'default': {'kl': 0.01256785448640585, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.144095420837402, 'total_loss': 8.30500602722168, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11857112497091293, 'vf_explained_var': 0.9932448863983154, 'vf_loss': 8.410853385925293}, 'grad_time_ms': 787.828}",3934253,69148.25281834602,-151.32705938647703,cda-server-6,24,-163.87497421953273,{},16464,10.157.146.6,{},-142.57374849668588,0,1200,2025-08-30_09-49-09,686,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756540149,50.0,823200,69148.25281834602,77.04174447059631,686
+824400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 86656.142, 'num_steps_sampled': 824400, 'update_time_ms': 2.651, 'num_steps_trained': 824400, 'load_time_ms': 0.625, 'default': {'kl': 0.013023233972489834, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.194946765899658, 'total_loss': 7.450124740600586, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11979203671216965, 'vf_explained_var': 0.9941511154174805, 'vf_loss': 7.556732177734375}, 'grad_time_ms': 776.974}",3934253,69179.33352923393,-151.38487842314197,cda-server-6,24,-162.7455905758164,{},16488,10.157.146.6,{},-142.57374849668588,0,1200,2025-08-30_09-49-41,687,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756540181,50.0,824400,69179.33352923393,31.080710887908936,687
+825600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 87077.504, 'num_steps_sampled': 825600, 'update_time_ms': 2.579, 'num_steps_trained': 825600, 'load_time_ms': 0.655, 'default': {'kl': 0.012215284630656242, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.197381496429443, 'total_loss': 6.406160354614258, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11759992688894272, 'vf_explained_var': 0.9950463771820068, 'vf_loss': 6.511392116546631}, 'grad_time_ms': 767.738}",3934253,69269.34405446053,-151.17998303169554,cda-server-6,24,-161.4520066765237,{},16512,10.157.146.6,{},-142.57374849668588,0,1200,2025-08-30_09-51-11,688,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756540271,50.0,825600,69269.34405446053,90.01052522659302,688
+826800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 85577.639, 'num_steps_sampled': 826800, 'update_time_ms': 2.554, 'num_steps_trained': 826800, 'load_time_ms': 0.653, 'default': {'kl': 0.012390440329909325, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.316177845001221, 'total_loss': 18.18695640563965, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11650380492210388, 'vf_explained_var': 0.9914601445198059, 'vf_loss': 18.290916442871094}, 'grad_time_ms': 778.028}",3934253,69360.6469142437,-151.49063416896567,cda-server-6,24,-165.0456315643184,{},16536,10.157.146.6,{},-142.57374849668588,0,1200,2025-08-30_09-52-42,689,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756540362,50.0,826800,69360.6469142437,91.30285978317261,689
+828000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 86750.332, 'num_steps_sampled': 828000, 'update_time_ms': 2.59, 'num_steps_trained': 828000, 'load_time_ms': 0.658, 'default': {'kl': 0.011441261507570744, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.2853240966796875, 'total_loss': 22.910051345825195, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11486013978719711, 'vf_explained_var': 0.9828611016273499, 'vf_loss': 23.013328552246094}, 'grad_time_ms': 770.52}",3934253,69460.30961084366,-151.7659706244583,cda-server-6,24,-180.0312363975615,{},16560,10.157.146.6,{},-147.80484426119497,0,1200,2025-08-30_09-54-22,690,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756540462,50.0,828000,69460.30961084366,99.66269659996033,690
+829200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 90554.163, 'num_steps_sampled': 829200, 'update_time_ms': 2.575, 'num_steps_trained': 829200, 'load_time_ms': 0.659, 'default': {'kl': 0.012882929295301437, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.380704879760742, 'total_loss': 14.827506065368652, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11432640999555588, 'vf_explained_var': 0.9901928305625916, 'vf_loss': 14.928787231445312}, 'grad_time_ms': 765.682}",3934253,69573.93123292923,-151.96510791490692,cda-server-6,24,-180.0312363975615,{},16584,10.157.146.6,{},-146.67734841385254,0,1200,2025-08-30_09-56-15,691,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756540575,50.0,829200,69573.93123292923,113.62162208557129,691
+830400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94301.75, 'num_steps_sampled': 830400, 'update_time_ms': 2.544, 'num_steps_trained': 830400, 'load_time_ms': 0.653, 'default': {'kl': 0.013330933637917042, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.488500595092773, 'total_loss': 25.625591278076172, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11284230649471283, 'vf_explained_var': 0.9840977191925049, 'vf_loss': 25.724937438964844}, 'grad_time_ms': 729.438}",3934253,69696.14327788353,-152.45992568128085,cda-server-6,24,-180.0312363975615,{},16608,10.157.146.6,{},-146.67734841385254,0,1200,2025-08-30_09-58-17,692,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756540697,50.0,830400,69696.14327788353,122.21204495429993,692
+831600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 93609.137, 'num_steps_sampled': 831600, 'update_time_ms': 2.539, 'num_steps_trained': 831600, 'load_time_ms': 0.641, 'default': {'kl': 0.013981933705508709, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.311878681182861, 'total_loss': 6.803781986236572, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11857353150844574, 'vf_explained_var': 0.9953944087028503, 'vf_loss': 6.908199310302734}, 'grad_time_ms': 718.791}",3934253,69799.17583036423,-152.2516817528202,cda-server-6,24,-180.0312363975615,{},16632,10.157.146.6,{},-146.67734841385254,0,1200,2025-08-30_10-00-01,693,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756540801,50.0,831600,69799.17583036423,103.03255248069763,693
+832800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 96258.397, 'num_steps_sampled': 832800, 'update_time_ms': 2.525, 'num_steps_trained': 832800, 'load_time_ms': 0.646, 'default': {'kl': 0.011710396967828274, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.321974754333496, 'total_loss': 7.118447303771973, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11277797818183899, 'vf_explained_var': 0.9942653179168701, 'vf_loss': 7.219368934631348}, 'grad_time_ms': 704.768}",3934253,69930.9327340126,-152.43928058541786,cda-server-6,24,-180.0312363975615,{},16656,10.157.146.6,{},-146.67734841385254,0,1200,2025-08-30_10-02-12,694,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756540932,50.0,832800,69930.9327340126,131.75690364837646,694
+834000,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 94101.325, 'num_steps_sampled': 834000, 'update_time_ms': 2.504, 'num_steps_trained': 834000, 'load_time_ms': 0.643, 'default': {'kl': 0.009334594011306763, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.432497501373291, 'total_loss': 42.389686584472656, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.0987766683101654, 'vf_explained_var': 0.9710657596588135, 'vf_loss': 42.479007720947266}, 'grad_time_ms': 674.781}",3934253,70019.05616569519,-152.5862841135043,cda-server-6,24,-215.12316385063616,{},16680,10.157.146.6,{},-139.70947457469018,0,1200,2025-08-30_10-03-40,695,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756541020,50.0,834000,70019.05616569519,88.12343168258667,695
+835200,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 97703.421, 'num_steps_sampled': 835200, 'update_time_ms': 2.461, 'num_steps_trained': 835200, 'load_time_ms': 0.642, 'default': {'kl': 0.010365894995629787, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.277856349945068, 'total_loss': 31.63107681274414, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.10792961716651917, 'vf_explained_var': 0.9769017696380615, 'vf_loss': 31.72850799560547}, 'grad_time_ms': 674.391}",3934253,70132.1143321991,-152.69711119289224,cda-server-6,24,-215.12316385063616,{},16704,10.157.146.6,{},-139.70947457469018,0,1200,2025-08-30_10-05-33,696,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756541133,50.0,835200,70132.1143321991,113.05816650390625,696
+836400,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 100679.433, 'num_steps_sampled': 836400, 'update_time_ms': 2.366, 'num_steps_trained': 836400, 'load_time_ms': 0.642, 'default': {'kl': 0.012433375231921673, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.009059429168701, 'total_loss': 14.474651336669922, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11254024505615234, 'vf_explained_var': 0.9880602359771729, 'vf_loss': 14.574604034423828}, 'grad_time_ms': 689.482}",3934253,70193.10514330864,-152.4112371065005,cda-server-6,24,-215.12316385063616,{},16728,10.157.146.6,{},-139.70947457469018,0,1200,2025-08-30_10-06-34,697,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756541194,50.0,836400,70193.10514330864,60.99081110954285,697
+837600,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 102270.979, 'num_steps_sampled': 837600, 'update_time_ms': 2.433, 'num_steps_trained': 837600, 'load_time_ms': 0.613, 'default': {'kl': 0.012350209057331085, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.168177127838135, 'total_loss': 10.318385124206543, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1254514455795288, 'vf_explained_var': 0.9916518330574036, 'vf_loss': 10.431331634521484}, 'grad_time_ms': 692.615}",3934253,70299.0635895729,-152.33370792697596,cda-server-6,24,-215.12316385063616,{},16752,10.157.146.6,{},-139.70947457469018,0,1200,2025-08-30_10-08-20,698,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756541300,50.0,837600,70299.0635895729,105.95844626426697,698
+838800,28bf8d7c89244732ac12356321e5be58,False,"{'sample_time_ms': 101079.944, 'num_steps_sampled': 838800, 'update_time_ms': 2.454, 'num_steps_trained': 838800, 'load_time_ms': 0.619, 'default': {'kl': 0.012040354311466217, 'cur_lr': 4.999999873689376e-05, 'entropy': 6.06229305267334, 'total_loss': 10.639694213867188, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.11114271730184555, 'vf_explained_var': 0.991613507270813, 'vf_loss': 10.738645553588867}, 'grad_time_ms': 687.555}",3934253,70378.40663385391,-152.0317558586114,cda-server-6,24,-186.7766576967727,{},16776,10.157.146.6,{},-141.7210758642898,0,1200,2025-08-30_10-09-40,699,"{'compress_observations': False, 'use_gae': True, 'num_envs_per_worker': 1, 'straggler_mitigation': False, 'input_evaluation': None, 'entropy_coeff': 0.0, 'output_max_file_size': 67108864, 'vf_share_layers': False, 'env_config': {'generalize': True, 'run_valid': False}, 'batch_mode': 'truncate_episodes', 'vf_loss_coeff': 1.0, 'observation_filter': 'MeanStdFilter', 'preprocessor_pref': 'deepmind', 'multiagent': {'policy_graphs': {}, 'policy_mapping_fn': None, 'policies_to_train': None}, 'lambda': 1.0, 'gamma': 0.99, 'num_cpus_for_driver': 1, 'log_level': 'INFO', 'num_cpus_per_worker': 1, 'clip_actions': True, 'synchronize_filters': True, 'sample_batch_size': 200, 'monitor': False, 'sample_async': False, 'num_workers': 6, 'num_sgd_iter': 30, 'postprocess_inputs': False, 'num_gpus_per_worker': 0, 'clip_param': 0.3, 'env': 'LEDRO_D_FC', 'vf_clip_param': 10.0, 'grad_clip': None, 'lr': 5e-05, 'simple_optimizer': False, 'kl_target': 0.01, 'optimizer': {}, 'tf_session_args': {'log_device_placement': False, 'gpu_options': {'allow_growth': True}, 'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'sgd_minibatch_size': 128, 'horizon': 50, 'model': {'fcnet_hiddens': [128, 128, 128], 'grayscale': False, 'zero_mean': True, 'custom_preprocessor': None, 'fcnet_activation': 'tanh', 'framestack': True, 'free_log_std': False, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, 'dim': 84, 'custom_options': {}, 'use_lstm': False, 'squash_to_range': False, 'conv_filters': None, 'custom_model': None}, 'lr_schedule': None, 'local_evaluator_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'input': 'sampler', 'output': None, 'kl_coeff': 0.2, 'clip_rewards': None, 'collect_metrics_timeout': 180, 'callbacks': {'on_train_result': None, 'on_sample_end': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'num_gpus': 0, 'custom_resources_per_worker': {}, 'train_batch_size': 1200, 'output_compress_columns': ['obs', 'new_obs']}",1756541380,50.0,838800,70378.40663385391,79.34304428100586,699
diff --git a/experiments/ledro_d_fc_7nm_run4_horizon_50_range_10_400_400_start_33/PPO_LEDRO_D_FC_0_2025-08-29_14-36-17yoqm6ky6/result.json b/experiments/ledro_d_fc_7nm_run4_horizon_50_range_10_400_400_start_33/PPO_LEDRO_D_FC_0_2025-08-29_14-36-17yoqm6ky6/result.json
new file mode 100644
index 0000000..b1ef045
--- /dev/null
+++ b/experiments/ledro_d_fc_7nm_run4_horizon_50_range_10_400_400_start_33/PPO_LEDRO_D_FC_0_2025-08-29_14-36-17yoqm6ky6/result.json
@@ -0,0 +1,699 @@
+{"timesteps_total": 1200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 238752.488, "num_steps_sampled": 1200, "update_time_ms": 1494.553, "num_steps_trained": 1200, "load_time_ms": 91.451, "default": {"kl": 0.02345726452767849, "cur_lr": 4.999999873689376e-05, "entropy": 18.652944564819336, "total_loss": 13102.5712890625, "cur_kl_coeff": 0.20000000298023224, "policy_loss": -0.1253841370344162, "vf_explained_var": -0.020561866462230682, "vf_loss": 13102.69140625}, "grad_time_ms": 1736.076}, "pid": 3934253, "time_total_s": 242.16078996658325, "episode_reward_mean": -230.22058282191497, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -248.50570683339015, "policy_reward_mean": {}, "episodes_total": 24, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -189.17222398744005, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_14-40-32", "training_iteration": 1, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756471232, "episode_len_mean": 50.0, "timesteps_since_restore": 1200, "time_since_restore": 242.16078996658325, "time_this_iter_s": 242.16078996658325, "iterations_since_restore": 1}
+{"timesteps_total": 2400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 178035.505, "num_steps_sampled": 2400, "update_time_ms": 749.314, "num_steps_trained": 2400, "load_time_ms": 46.26, "default": {"kl": 0.022505946457386017, "cur_lr": 4.999999873689376e-05, "entropy": 18.62984848022461, "total_loss": 12515.01171875, "cur_kl_coeff": 0.30000001192092896, "policy_loss": -0.12131085991859436, "vf_explained_var": 0.046773672103881836, "vf_loss": 12515.125}, "grad_time_ms": 1248.081}, "pid": 3934253, "time_total_s": 360.25028228759766, "episode_reward_mean": -229.7708322779712, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -248.50570683339015, "policy_reward_mean": {}, "episodes_total": 48, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -189.17222398744005, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_14-42-30", "training_iteration": 2, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756471350, "episode_len_mean": 50.0, "timesteps_since_restore": 2400, "time_since_restore": 360.25028228759766, "time_this_iter_s": 118.0894923210144, "iterations_since_restore": 2}
+{"timesteps_total": 3600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 160903.856, "num_steps_sampled": 3600, "update_time_ms": 500.221, "num_steps_trained": 3600, "load_time_ms": 31.074, "default": {"kl": 0.021168239414691925, "cur_lr": 4.999999873689376e-05, "entropy": 18.610870361328125, "total_loss": 12362.8056640625, "cur_kl_coeff": 0.44999995827674866, "policy_loss": -0.12194083631038666, "vf_explained_var": 0.0496826171875, "vf_loss": 12362.91796875}, "grad_time_ms": 1005.561}, "pid": 3934253, "time_total_s": 487.41902899742126, "episode_reward_mean": -230.6458543464934, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -248.50570683339015, "policy_reward_mean": {}, "episodes_total": 72, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -189.17222398744005, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_14-44-37", "training_iteration": 3, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756471477, "episode_len_mean": 50.0, "timesteps_since_restore": 3600, "time_since_restore": 487.41902899742126, "time_this_iter_s": 127.16874670982361, "iterations_since_restore": 3}
+{"timesteps_total": 4800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 153296.846, "num_steps_sampled": 4800, "update_time_ms": 375.774, "num_steps_trained": 4800, "load_time_ms": 23.521, "default": {"kl": 0.020337438210844994, "cur_lr": 4.999999873689376e-05, "entropy": 18.58652687072754, "total_loss": 12319.80859375, "cur_kl_coeff": 0.675000011920929, "policy_loss": -0.12520265579223633, "vf_explained_var": 0.05312725529074669, "vf_loss": 12319.919921875}, "grad_time_ms": 927.15}, "pid": 3934253, "time_total_s": 618.5954301357269, "episode_reward_mean": -231.9126016253825, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -248.50570683339015, "policy_reward_mean": {}, "episodes_total": 96, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -189.17222398744005, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_14-46-49", "training_iteration": 4, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756471609, "episode_len_mean": 50.0, "timesteps_since_restore": 4800, "time_since_restore": 618.5954301357269, "time_this_iter_s": 131.17640113830566, "iterations_since_restore": 4}
+{"timesteps_total": 6000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 149198.611, "num_steps_sampled": 6000, "update_time_ms": 301.15, "num_steps_trained": 6000, "load_time_ms": 18.931, "default": {"kl": 0.017712781205773354, "cur_lr": 4.999999873689376e-05, "entropy": 18.564531326293945, "total_loss": 11199.1513671875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13221319019794464, "vf_explained_var": 0.030576281249523163, "vf_loss": 11199.263671875}, "grad_time_ms": 893.167}, "pid": 3934253, "time_total_s": 752.1666700839996, "episode_reward_mean": -232.0932859758354, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -248.10200411755505, "policy_reward_mean": {}, "episodes_total": 120, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -200.99987523969685, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_14-49-02", "training_iteration": 5, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756471742, "episode_len_mean": 50.0, "timesteps_since_restore": 6000, "time_since_restore": 752.1666700839996, "time_this_iter_s": 133.5712399482727, "iterations_since_restore": 5}
+{"timesteps_total": 7200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 146583.585, "num_steps_sampled": 7200, "update_time_ms": 251.309, "num_steps_trained": 7200, "load_time_ms": 15.871, "default": {"kl": 0.01696646213531494, "cur_lr": 4.999999873689376e-05, "entropy": 18.5582275390625, "total_loss": 11126.30859375, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12488171458244324, "vf_explained_var": 0.012747373431921005, "vf_loss": 11126.4169921875}, "grad_time_ms": 863.632}, "pid": 3934253, "time_total_s": 886.3993492126465, "episode_reward_mean": -233.2256954990561, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -248.10200411755505, "policy_reward_mean": {}, "episodes_total": 144, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -200.99987523969685, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_14-51-16", "training_iteration": 6, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756471876, "episode_len_mean": 50.0, "timesteps_since_restore": 7200, "time_since_restore": 886.3993492126465, "time_this_iter_s": 134.23267912864685, "iterations_since_restore": 6}
+{"timesteps_total": 8400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 143834.975, "num_steps_sampled": 8400, "update_time_ms": 215.706, "num_steps_trained": 8400, "load_time_ms": 13.737, "default": {"kl": 0.018511280417442322, "cur_lr": 4.999999873689376e-05, "entropy": 18.533920288085938, "total_loss": 10328.9599609375, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12375900149345398, "vf_explained_var": 0.007189598400145769, "vf_loss": 10329.0654296875}, "grad_time_ms": 849.341}, "pid": 3934253, "time_total_s": 1014.5144400596619, "episode_reward_mean": -232.3667402438889, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -248.10200411755505, "policy_reward_mean": {}, "episodes_total": 168, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -199.95306198707857, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_14-53-24", "training_iteration": 7, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756472004, "episode_len_mean": 50.0, "timesteps_since_restore": 8400, "time_since_restore": 1014.5144400596619, "time_this_iter_s": 128.11509084701538, "iterations_since_restore": 7}
+{"timesteps_total": 9600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 141122.885, "num_steps_sampled": 9600, "update_time_ms": 189.046, "num_steps_trained": 9600, "load_time_ms": 12.13, "default": {"kl": 0.016686219722032547, "cur_lr": 4.999999873689376e-05, "entropy": 18.514015197753906, "total_loss": 9768.44140625, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1354256272315979, "vf_explained_var": 0.004887203220278025, "vf_loss": 9768.5595703125}, "grad_time_ms": 837.745}, "pid": 3934253, "time_total_s": 1137.4176816940308, "episode_reward_mean": -230.78368277525772, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -247.03019267128656, "policy_reward_mean": {}, "episodes_total": 192, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -197.89664123792278, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_14-55-27", "training_iteration": 8, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756472127, "episode_len_mean": 50.0, "timesteps_since_restore": 9600, "time_since_restore": 1137.4176816940308, "time_this_iter_s": 122.9032416343689, "iterations_since_restore": 8}
+{"timesteps_total": 10800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 140244.999, "num_steps_sampled": 10800, "update_time_ms": 168.316, "num_steps_trained": 10800, "load_time_ms": 10.847, "default": {"kl": 0.016497083008289337, "cur_lr": 4.999999873689376e-05, "entropy": 18.499237060546875, "total_loss": 9278.640625, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1313558965921402, "vf_explained_var": 0.002922866027802229, "vf_loss": 9278.7548828125}, "grad_time_ms": 828.448}, "pid": 3934253, "time_total_s": 1271.4018051624298, "episode_reward_mean": -229.5842021622292, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -247.858779289968, "policy_reward_mean": {}, "episodes_total": 216, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -197.89664123792278, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_14-57-41", "training_iteration": 9, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756472261, "episode_len_mean": 50.0, "timesteps_since_restore": 10800, "time_since_restore": 1271.4018051624298, "time_this_iter_s": 133.98412346839905, "iterations_since_restore": 9}
+{"timesteps_total": 12000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 138991.608, "num_steps_sampled": 12000, "update_time_ms": 151.715, "num_steps_trained": 12000, "load_time_ms": 9.854, "default": {"kl": 0.018143020570278168, "cur_lr": 4.999999873689376e-05, "entropy": 18.48088264465332, "total_loss": 9191.5791015625, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13637499511241913, "vf_explained_var": 0.0037107665557414293, "vf_loss": 9191.697265625}, "grad_time_ms": 817.367}, "pid": 3934253, "time_total_s": 1399.8384637832642, "episode_reward_mean": -228.35528496800046, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -247.858779289968, "policy_reward_mean": {}, "episodes_total": 240, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -195.66306370400125, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_14-59-50", "training_iteration": 10, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756472390, "episode_len_mean": 50.0, "timesteps_since_restore": 12000, "time_since_restore": 1399.8384637832642, "time_this_iter_s": 128.43665862083435, "iterations_since_restore": 10}
+{"timesteps_total": 13200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 126993.623, "num_steps_sampled": 13200, "update_time_ms": 2.449, "num_steps_trained": 13200, "load_time_ms": 0.766, "default": {"kl": 0.016372594982385635, "cur_lr": 4.999999873689376e-05, "entropy": 18.44902229309082, "total_loss": 8664.150390625, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12930560111999512, "vf_explained_var": 0.014810138382017612, "vf_loss": 8664.2626953125}, "grad_time_ms": 693.016}, "pid": 3934253, "time_total_s": 1519.110630273819, "episode_reward_mean": -226.7992343391553, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -247.858779289968, "policy_reward_mean": {}, "episodes_total": 264, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -188.91056735653865, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_15-01-49", "training_iteration": 11, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756472509, "episode_len_mean": 50.0, "timesteps_since_restore": 13200, "time_since_restore": 1519.110630273819, "time_this_iter_s": 119.27216649055481, "iterations_since_restore": 11}
+{"timesteps_total": 14400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 126796.976, "num_steps_sampled": 14400, "update_time_ms": 2.254, "num_steps_trained": 14400, "load_time_ms": 0.723, "default": {"kl": 0.018367202952504158, "cur_lr": 4.999999873689376e-05, "entropy": 18.43258285522461, "total_loss": 9163.8193359375, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1256006956100464, "vf_explained_var": 0.001056631444953382, "vf_loss": 9163.92578125}, "grad_time_ms": 689.564}, "pid": 3934253, "time_total_s": 1635.195505142212, "episode_reward_mean": -227.8824827519844, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -247.858779289968, "policy_reward_mean": {}, "episodes_total": 288, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -188.91056735653865, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_15-03-45", "training_iteration": 12, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756472625, "episode_len_mean": 50.0, "timesteps_since_restore": 14400, "time_since_restore": 1635.195505142212, "time_this_iter_s": 116.08487486839294, "iterations_since_restore": 12}
+{"timesteps_total": 15600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 122994.689, "num_steps_sampled": 15600, "update_time_ms": 2.27, "num_steps_trained": 15600, "load_time_ms": 0.718, "default": {"kl": 0.018263446167111397, "cur_lr": 4.999999873689376e-05, "entropy": 18.406959533691406, "total_loss": 8039.34375, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13872545957565308, "vf_explained_var": 0.009270284324884415, "vf_loss": 8039.46337890625}, "grad_time_ms": 711.946}, "pid": 3934253, "time_total_s": 1724.5654287338257, "episode_reward_mean": -226.95394541182313, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -246.89273563832404, "policy_reward_mean": {}, "episodes_total": 312, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -188.91056735653865, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_15-05-15", "training_iteration": 13, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756472715, "episode_len_mean": 50.0, "timesteps_since_restore": 15600, "time_since_restore": 1724.5654287338257, "time_this_iter_s": 89.36992359161377, "iterations_since_restore": 13}
+{"timesteps_total": 16800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 120749.929, "num_steps_sampled": 16800, "update_time_ms": 2.298, "num_steps_trained": 16800, "load_time_ms": 0.694, "default": {"kl": 0.01851937174797058, "cur_lr": 4.999999873689376e-05, "entropy": 18.40781593322754, "total_loss": 7785.65380859375, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1454668492078781, "vf_explained_var": 0.0034669903106987476, "vf_loss": 7785.7802734375}, "grad_time_ms": 718.79}, "pid": 3934253, "time_total_s": 1833.362226486206, "episode_reward_mean": -226.5395327474367, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -246.89273563832404, "policy_reward_mean": {}, "episodes_total": 336, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -188.91056735653865, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_15-07-03", "training_iteration": 14, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756472823, "episode_len_mean": 50.0, "timesteps_since_restore": 16800, "time_since_restore": 1833.362226486206, "time_this_iter_s": 108.79679775238037, "iterations_since_restore": 14}
+{"timesteps_total": 18000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 120444.62, "num_steps_sampled": 18000, "update_time_ms": 2.292, "num_steps_trained": 18000, "load_time_ms": 0.695, "default": {"kl": 0.018088672310113907, "cur_lr": 4.999999873689376e-05, "entropy": 18.373947143554688, "total_loss": 7424.033203125, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13359440863132477, "vf_explained_var": -2.5298859327449463e-06, "vf_loss": 7424.1494140625}, "grad_time_ms": 707.939}, "pid": 3934253, "time_total_s": 1963.7715697288513, "episode_reward_mean": -226.28930702200313, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -246.89273563832404, "policy_reward_mean": {}, "episodes_total": 360, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -192.68354188559, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_15-09-14", "training_iteration": 15, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756472954, "episode_len_mean": 50.0, "timesteps_since_restore": 18000, "time_since_restore": 1963.7715697288513, "time_this_iter_s": 130.40934324264526, "iterations_since_restore": 15}
+{"timesteps_total": 19200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 115962.542, "num_steps_sampled": 19200, "update_time_ms": 2.277, "num_steps_trained": 19200, "load_time_ms": 0.705, "default": {"kl": 0.018010612577199936, "cur_lr": 4.999999873689376e-05, "entropy": 18.32969856262207, "total_loss": 7933.4677734375, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13802139461040497, "vf_explained_var": -0.001161250751465559, "vf_loss": 7933.5869140625}, "grad_time_ms": 717.734}, "pid": 3934253, "time_total_s": 2053.281415939331, "episode_reward_mean": -225.430023675914, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -247.85240578397764, "policy_reward_mean": {}, "episodes_total": 384, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -192.68354188559, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_15-10-43", "training_iteration": 16, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756473043, "episode_len_mean": 50.0, "timesteps_since_restore": 19200, "time_since_restore": 2053.281415939331, "time_this_iter_s": 89.50984621047974, "iterations_since_restore": 16}
+{"timesteps_total": 20400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 115180.178, "num_steps_sampled": 20400, "update_time_ms": 2.306, "num_steps_trained": 20400, "load_time_ms": 0.674, "default": {"kl": 0.01733492501080036, "cur_lr": 4.999999873689376e-05, "entropy": 18.352266311645508, "total_loss": 6935.912109375, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13916505873203278, "vf_explained_var": 0.0398666188120842, "vf_loss": 6936.03369140625}, "grad_time_ms": 715.961}, "pid": 3934253, "time_total_s": 2173.555982351303, "episode_reward_mean": -224.31972516845806, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -247.85240578397764, "policy_reward_mean": {}, "episodes_total": 408, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -192.56745469224097, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_15-12-44", "training_iteration": 17, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756473164, "episode_len_mean": 50.0, "timesteps_since_restore": 20400, "time_since_restore": 2173.555982351303, "time_this_iter_s": 120.27456641197205, "iterations_since_restore": 17}
+{"timesteps_total": 21600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 115446.416, "num_steps_sampled": 21600, "update_time_ms": 2.353, "num_steps_trained": 21600, "load_time_ms": 0.645, "default": {"kl": 0.018643349409103394, "cur_lr": 4.999999873689376e-05, "entropy": 18.32407569885254, "total_loss": 7109.57861328125, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14262330532073975, "vf_explained_var": 0.058559127151966095, "vf_loss": 7109.7021484375}, "grad_time_ms": 716.764}, "pid": 3934253, "time_total_s": 2299.1294887065887, "episode_reward_mean": -224.86252533298918, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -248.00111036780248, "policy_reward_mean": {}, "episodes_total": 432, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -192.56745469224097, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_15-14-49", "training_iteration": 18, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756473289, "episode_len_mean": 50.0, "timesteps_since_restore": 21600, "time_since_restore": 2299.1294887065887, "time_this_iter_s": 125.57350635528564, "iterations_since_restore": 18}
+{"timesteps_total": 22800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 111634.23, "num_steps_sampled": 22800, "update_time_ms": 2.343, "num_steps_trained": 22800, "load_time_ms": 0.647, "default": {"kl": 0.017198346555233, "cur_lr": 4.999999873689376e-05, "entropy": 18.320524215698242, "total_loss": 6918.37060546875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13064756989479065, "vf_explained_var": 0.07814642041921616, "vf_loss": 6918.48388671875}, "grad_time_ms": 711.846}, "pid": 3934253, "time_total_s": 2394.942296743393, "episode_reward_mean": -225.21955188410809, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -248.00111036780248, "policy_reward_mean": {}, "episodes_total": 456, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -192.56745469224097, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_15-16-25", "training_iteration": 19, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756473385, "episode_len_mean": 50.0, "timesteps_since_restore": 22800, "time_since_restore": 2394.942296743393, "time_this_iter_s": 95.8128080368042, "iterations_since_restore": 19}
+{"timesteps_total": 24000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 111642.021, "num_steps_sampled": 24000, "update_time_ms": 2.345, "num_steps_trained": 24000, "load_time_ms": 0.617, "default": {"kl": 0.018709510564804077, "cur_lr": 4.999999873689376e-05, "entropy": 18.2652645111084, "total_loss": 6797.3310546875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14481525123119354, "vf_explained_var": 0.09185083210468292, "vf_loss": 6797.45703125}, "grad_time_ms": 720.998}, "pid": 3934253, "time_total_s": 2523.54922413826, "episode_reward_mean": -224.69537291467503, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -248.00111036780248, "policy_reward_mean": {}, "episodes_total": 480, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -190.85068285650394, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_15-18-34", "training_iteration": 20, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756473514, "episode_len_mean": 50.0, "timesteps_since_restore": 24000, "time_since_restore": 2523.54922413826, "time_this_iter_s": 128.60692739486694, "iterations_since_restore": 20}
+{"timesteps_total": 25200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 111823.423, "num_steps_sampled": 25200, "update_time_ms": 2.43, "num_steps_trained": 25200, "load_time_ms": 0.654, "default": {"kl": 0.016120517626404762, "cur_lr": 4.999999873689376e-05, "entropy": 18.257076263427734, "total_loss": 6505.86279296875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14086978137493134, "vf_explained_var": 0.0601482056081295, "vf_loss": 6505.98779296875}, "grad_time_ms": 743.136}, "pid": 3934253, "time_total_s": 2644.859076499939, "episode_reward_mean": -225.42085905668347, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -248.00111036780248, "policy_reward_mean": {}, "episodes_total": 504, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -190.85068285650394, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_15-20-35", "training_iteration": 21, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756473635, "episode_len_mean": 50.0, "timesteps_since_restore": 25200, "time_since_restore": 2644.859076499939, "time_this_iter_s": 121.30985236167908, "iterations_since_restore": 21}
+{"timesteps_total": 26400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 108554.114, "num_steps_sampled": 26400, "update_time_ms": 2.528, "num_steps_trained": 26400, "load_time_ms": 0.653, "default": {"kl": 0.01815476268529892, "cur_lr": 4.999999873689376e-05, "entropy": 18.25238037109375, "total_loss": 6361.66943359375, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1422284096479416, "vf_explained_var": 0.0871841087937355, "vf_loss": 6361.79345703125}, "grad_time_ms": 733.411}, "pid": 3934253, "time_total_s": 2728.1552817821503, "episode_reward_mean": -227.12070903133855, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -247.372242841637, "policy_reward_mean": {}, "episodes_total": 528, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -190.85068285650394, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_15-21-58", "training_iteration": 22, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756473718, "episode_len_mean": 50.0, "timesteps_since_restore": 26400, "time_since_restore": 2728.1552817821503, "time_this_iter_s": 83.2962052822113, "iterations_since_restore": 22}
+{"timesteps_total": 27600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 113536.284, "num_steps_sampled": 27600, "update_time_ms": 2.523, "num_steps_trained": 27600, "load_time_ms": 0.646, "default": {"kl": 0.01872488297522068, "cur_lr": 4.999999873689376e-05, "entropy": 18.202598571777344, "total_loss": 5540.90380859375, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14064835011959076, "vf_explained_var": 0.09507162123918533, "vf_loss": 5541.025390625}, "grad_time_ms": 721.676}, "pid": 3934253, "time_total_s": 2867.229010820389, "episode_reward_mean": -224.52522155211645, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -247.372242841637, "policy_reward_mean": {}, "episodes_total": 552, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -188.93533640553093, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_15-24-17", "training_iteration": 23, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756473857, "episode_len_mean": 50.0, "timesteps_since_restore": 27600, "time_since_restore": 2867.229010820389, "time_this_iter_s": 139.07372903823853, "iterations_since_restore": 23}
+{"timesteps_total": 28800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 111666.508, "num_steps_sampled": 28800, "update_time_ms": 2.53, "num_steps_trained": 28800, "load_time_ms": 0.645, "default": {"kl": 0.017991013824939728, "cur_lr": 4.999999873689376e-05, "entropy": 18.139867782592773, "total_loss": 5488.4775390625, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.15115779638290405, "vf_explained_var": 0.122310571372509, "vf_loss": 5488.6103515625}, "grad_time_ms": 706.538}, "pid": 3934253, "time_total_s": 2957.178115129471, "episode_reward_mean": -222.86015856207715, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -248.2345499737599, "policy_reward_mean": {}, "episodes_total": 576, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -188.93533640553093, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_15-25-47", "training_iteration": 24, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756473947, "episode_len_mean": 50.0, "timesteps_since_restore": 28800, "time_since_restore": 2957.178115129471, "time_this_iter_s": 89.94910430908203, "iterations_since_restore": 24}
+{"timesteps_total": 30000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 110789.341, "num_steps_sampled": 30000, "update_time_ms": 2.507, "num_steps_trained": 30000, "load_time_ms": 0.649, "default": {"kl": 0.018119478598237038, "cur_lr": 4.999999873689376e-05, "entropy": 18.104427337646484, "total_loss": 5291.33203125, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1454295516014099, "vf_explained_var": 0.1271432340145111, "vf_loss": 5291.458984375}, "grad_time_ms": 689.769}, "pid": 3934253, "time_total_s": 3078.6474380493164, "episode_reward_mean": -220.54939918657251, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -248.2345499737599, "policy_reward_mean": {}, "episodes_total": 600, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -185.60920330149142, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_15-27-49", "training_iteration": 25, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756474069, "episode_len_mean": 50.0, "timesteps_since_restore": 30000, "time_since_restore": 3078.6474380493164, "time_this_iter_s": 121.46932291984558, "iterations_since_restore": 25}
+{"timesteps_total": 31200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 114266.039, "num_steps_sampled": 31200, "update_time_ms": 2.532, "num_steps_trained": 31200, "load_time_ms": 0.644, "default": {"kl": 0.018067501485347748, "cur_lr": 4.999999873689376e-05, "entropy": 18.099180221557617, "total_loss": 4856.0693359375, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1323235034942627, "vf_explained_var": 0.12717147171497345, "vf_loss": 4856.18310546875}, "grad_time_ms": 666.322}, "pid": 3934253, "time_total_s": 3202.6897122859955, "episode_reward_mean": -217.74776505287662, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -248.2345499737599, "policy_reward_mean": {}, "episodes_total": 624, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -185.60920330149142, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_15-29-53", "training_iteration": 26, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756474193, "episode_len_mean": 50.0, "timesteps_since_restore": 31200, "time_since_restore": 3202.6897122859955, "time_this_iter_s": 124.04227423667908, "iterations_since_restore": 26}
+{"timesteps_total": 32400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 114420.868, "num_steps_sampled": 32400, "update_time_ms": 2.546, "num_steps_trained": 32400, "load_time_ms": 0.642, "default": {"kl": 0.01893593929708004, "cur_lr": 4.999999873689376e-05, "entropy": 18.082481384277344, "total_loss": 4736.587890625, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14160507917404175, "vf_explained_var": 0.1733734905719757, "vf_loss": 4736.7099609375}, "grad_time_ms": 654.305}, "pid": 3934253, "time_total_s": 3324.3915185928345, "episode_reward_mean": -216.32067322708596, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -248.2345499737599, "policy_reward_mean": {}, "episodes_total": 648, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -185.60920330149142, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_15-31-55", "training_iteration": 27, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756474315, "episode_len_mean": 50.0, "timesteps_since_restore": 32400, "time_since_restore": 3324.3915185928345, "time_this_iter_s": 121.70180630683899, "iterations_since_restore": 27}
+{"timesteps_total": 33600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 113001.238, "num_steps_sampled": 33600, "update_time_ms": 2.484, "num_steps_trained": 33600, "load_time_ms": 0.644, "default": {"kl": 0.018984422087669373, "cur_lr": 4.999999873689376e-05, "entropy": 18.03122329711914, "total_loss": 4447.91552734375, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.15685193240642548, "vf_explained_var": 0.14473694562911987, "vf_loss": 4448.052734375}, "grad_time_ms": 647.223}, "pid": 3934253, "time_total_s": 3435.6978681087494, "episode_reward_mean": -213.80917812804458, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -248.83030415581862, "policy_reward_mean": {}, "episodes_total": 672, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -185.60920330149142, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_15-33-46", "training_iteration": 28, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756474426, "episode_len_mean": 50.0, "timesteps_since_restore": 33600, "time_since_restore": 3435.6978681087494, "time_this_iter_s": 111.30634951591492, "iterations_since_restore": 28}
+{"timesteps_total": 34800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 116696.335, "num_steps_sampled": 34800, "update_time_ms": 2.497, "num_steps_trained": 34800, "load_time_ms": 0.646, "default": {"kl": 0.01779862865805626, "cur_lr": 4.999999873689376e-05, "entropy": 18.02900505065918, "total_loss": 4699.4326171875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13835091888904572, "vf_explained_var": 0.21267952024936676, "vf_loss": 4699.552734375}, "grad_time_ms": 639.428}, "pid": 3934253, "time_total_s": 3568.38410115242, "episode_reward_mean": -214.57597202649774, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -248.83030415581862, "policy_reward_mean": {}, "episodes_total": 696, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -191.44960194830855, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_15-35-59", "training_iteration": 29, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756474559, "episode_len_mean": 50.0, "timesteps_since_restore": 34800, "time_since_restore": 3568.38410115242, "time_this_iter_s": 132.68623304367065, "iterations_since_restore": 29}
+{"timesteps_total": 36000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 117910.592, "num_steps_sampled": 36000, "update_time_ms": 2.546, "num_steps_trained": 36000, "load_time_ms": 0.647, "default": {"kl": 0.0186467245221138, "cur_lr": 4.999999873689376e-05, "entropy": 18.017282485961914, "total_loss": 4788.78857421875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14452102780342102, "vf_explained_var": 0.1922873705625534, "vf_loss": 4788.9140625}, "grad_time_ms": 635.195}, "pid": 3934253, "time_total_s": 3709.0914623737335, "episode_reward_mean": -216.39424869499814, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -248.83030415581862, "policy_reward_mean": {}, "episodes_total": 720, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -189.41091576437802, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_15-38-19", "training_iteration": 30, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756474699, "episode_len_mean": 50.0, "timesteps_since_restore": 36000, "time_since_restore": 3709.0914623737335, "time_this_iter_s": 140.70736122131348, "iterations_since_restore": 30}
+{"timesteps_total": 37200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 121207.566, "num_steps_sampled": 37200, "update_time_ms": 2.549, "num_steps_trained": 37200, "load_time_ms": 0.611, "default": {"kl": 0.017331527546048164, "cur_lr": 4.999999873689376e-05, "entropy": 17.992008209228516, "total_loss": 4373.1201171875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13839353621006012, "vf_explained_var": 0.04371914640069008, "vf_loss": 4373.24072265625}, "grad_time_ms": 637.987}, "pid": 3934253, "time_total_s": 3863.398061275482, "episode_reward_mean": -215.19791028193805, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -248.83030415581862, "policy_reward_mean": {}, "episodes_total": 744, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -186.86999539137864, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_15-40-54", "training_iteration": 31, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756474854, "episode_len_mean": 50.0, "timesteps_since_restore": 37200, "time_since_restore": 3863.398061275482, "time_this_iter_s": 154.30659890174866, "iterations_since_restore": 31}
+{"timesteps_total": 38400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 124453.716, "num_steps_sampled": 38400, "update_time_ms": 2.442, "num_steps_trained": 38400, "load_time_ms": 0.613, "default": {"kl": 0.017102720215916634, "cur_lr": 4.999999873689376e-05, "entropy": 17.92547607421875, "total_loss": 3819.66357421875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13893364369869232, "vf_explained_var": 0.1954089254140854, "vf_loss": 3819.78515625}, "grad_time_ms": 644.967}, "pid": 3934253, "time_total_s": 3979.224608182907, "episode_reward_mean": -215.13311737404922, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -246.02100713653664, "policy_reward_mean": {}, "episodes_total": 768, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -186.86999539137864, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_15-42-50", "training_iteration": 32, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756474970, "episode_len_mean": 50.0, "timesteps_since_restore": 38400, "time_since_restore": 3979.224608182907, "time_this_iter_s": 115.82654690742493, "iterations_since_restore": 32}
+{"timesteps_total": 39600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 123454.297, "num_steps_sampled": 39600, "update_time_ms": 2.494, "num_steps_trained": 39600, "load_time_ms": 0.612, "default": {"kl": 0.017682187259197235, "cur_lr": 4.999999873689376e-05, "entropy": 17.938262939453125, "total_loss": 3737.1103515625, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14358346164226532, "vf_explained_var": 0.1921062171459198, "vf_loss": 3737.236083984375}, "grad_time_ms": 659.685}, "pid": 3934253, "time_total_s": 4108.452016592026, "episode_reward_mean": -213.94500279124793, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -246.02100713653664, "policy_reward_mean": {}, "episodes_total": 792, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -186.86999539137864, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_15-44-59", "training_iteration": 33, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756475099, "episode_len_mean": 50.0, "timesteps_since_restore": 39600, "time_since_restore": 4108.452016592026, "time_this_iter_s": 129.22740840911865, "iterations_since_restore": 33}
+{"timesteps_total": 40800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 128326.13, "num_steps_sampled": 40800, "update_time_ms": 2.44, "num_steps_trained": 40800, "load_time_ms": 0.608, "default": {"kl": 0.017134059220552444, "cur_lr": 4.999999873689376e-05, "entropy": 17.834041595458984, "total_loss": 3461.917724609375, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12815701961517334, "vf_explained_var": 0.15495187044143677, "vf_loss": 3462.0283203125}, "grad_time_ms": 675.352}, "pid": 3934253, "time_total_s": 4247.277045726776, "episode_reward_mean": -211.12190019537888, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -246.02100713653664, "policy_reward_mean": {}, "episodes_total": 816, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -186.86999539137864, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_15-47-18", "training_iteration": 34, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756475238, "episode_len_mean": 50.0, "timesteps_since_restore": 40800, "time_since_restore": 4247.277045726776, "time_this_iter_s": 138.82502913475037, "iterations_since_restore": 34}
+{"timesteps_total": 42000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 125027.49, "num_steps_sampled": 42000, "update_time_ms": 2.422, "num_steps_trained": 42000, "load_time_ms": 0.614, "default": {"kl": 0.018565503880381584, "cur_lr": 4.999999873689376e-05, "entropy": 17.86197280883789, "total_loss": 4044.3408203125, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14776770770549774, "vf_explained_var": 0.1975460797548294, "vf_loss": 4044.4697265625}, "grad_time_ms": 709.139}, "pid": 3934253, "time_total_s": 4336.0987548828125, "episode_reward_mean": -211.779697417606, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -244.79919426101299, "policy_reward_mean": {}, "episodes_total": 840, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -185.87812229652314, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_15-48-46", "training_iteration": 35, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756475326, "episode_len_mean": 50.0, "timesteps_since_restore": 42000, "time_since_restore": 4336.0987548828125, "time_this_iter_s": 88.82170915603638, "iterations_since_restore": 35}
+{"timesteps_total": 43200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 124788.062, "num_steps_sampled": 43200, "update_time_ms": 2.438, "num_steps_trained": 43200, "load_time_ms": 0.615, "default": {"kl": 0.017858348786830902, "cur_lr": 4.999999873689376e-05, "entropy": 17.815601348876953, "total_loss": 3304.437255859375, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14674967527389526, "vf_explained_var": 0.30027350783348083, "vf_loss": 3304.56591796875}, "grad_time_ms": 723.817}, "pid": 3934253, "time_total_s": 4457.893758058548, "episode_reward_mean": -212.04843016952287, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -245.58104877489959, "policy_reward_mean": {}, "episodes_total": 864, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -185.87812229652314, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_15-50-48", "training_iteration": 36, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756475448, "episode_len_mean": 50.0, "timesteps_since_restore": 43200, "time_since_restore": 4457.893758058548, "time_this_iter_s": 121.79500317573547, "iterations_since_restore": 36}
+{"timesteps_total": 44400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 124188.457, "num_steps_sampled": 44400, "update_time_ms": 2.4, "num_steps_trained": 44400, "load_time_ms": 0.611, "default": {"kl": 0.01844792626798153, "cur_lr": 4.999999873689376e-05, "entropy": 17.81826400756836, "total_loss": 3297.3232421875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1480633020401001, "vf_explained_var": 0.19995717704296112, "vf_loss": 3297.452392578125}, "grad_time_ms": 735.92}, "pid": 3934253, "time_total_s": 4573.720880746841, "episode_reward_mean": -211.19359964775035, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -245.58104877489959, "policy_reward_mean": {}, "episodes_total": 888, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -185.87812229652314, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_15-52-44", "training_iteration": 37, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756475564, "episode_len_mean": 50.0, "timesteps_since_restore": 44400, "time_since_restore": 4573.720880746841, "time_this_iter_s": 115.82712268829346, "iterations_since_restore": 37}
+{"timesteps_total": 45600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 126123.452, "num_steps_sampled": 45600, "update_time_ms": 2.4, "num_steps_trained": 45600, "load_time_ms": 0.649, "default": {"kl": 0.018443183973431587, "cur_lr": 4.999999873689376e-05, "entropy": 17.755903244018555, "total_loss": 3353.221435546875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.15454323589801788, "vf_explained_var": 0.2896014153957367, "vf_loss": 3353.357177734375}, "grad_time_ms": 738.131}, "pid": 3934253, "time_total_s": 4704.400423049927, "episode_reward_mean": -213.00286027217822, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -247.5537867115574, "policy_reward_mean": {}, "episodes_total": 912, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -185.87812229652314, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_15-54-55", "training_iteration": 38, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756475695, "episode_len_mean": 50.0, "timesteps_since_restore": 45600, "time_since_restore": 4704.400423049927, "time_this_iter_s": 130.67954230308533, "iterations_since_restore": 38}
+{"timesteps_total": 46800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 121236.767, "num_steps_sampled": 46800, "update_time_ms": 2.451, "num_steps_trained": 46800, "load_time_ms": 0.646, "default": {"kl": 0.018317891284823418, "cur_lr": 4.999999873689376e-05, "entropy": 17.811492919921875, "total_loss": 3417.5546875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1502545177936554, "vf_explained_var": 0.2569473087787628, "vf_loss": 3417.686279296875}, "grad_time_ms": 758.619}, "pid": 3934253, "time_total_s": 4788.425406217575, "episode_reward_mean": -212.3051676911543, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -247.5537867115574, "policy_reward_mean": {}, "episodes_total": 936, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -181.43706975607378, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_15-56-19", "training_iteration": 39, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756475779, "episode_len_mean": 50.0, "timesteps_since_restore": 46800, "time_since_restore": 4788.425406217575, "time_this_iter_s": 84.02498316764832, "iterations_since_restore": 39}
+{"timesteps_total": 48000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 119440.389, "num_steps_sampled": 48000, "update_time_ms": 2.464, "num_steps_trained": 48000, "load_time_ms": 0.648, "default": {"kl": 0.01919081062078476, "cur_lr": 4.999999873689376e-05, "entropy": 17.807842254638672, "total_loss": 3161.85986328125, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.15894289314746857, "vf_explained_var": 0.35566556453704834, "vf_loss": 3161.999267578125}, "grad_time_ms": 738.052}, "pid": 3934253, "time_total_s": 4910.962848186493, "episode_reward_mean": -213.84192706556107, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -247.5537867115574, "policy_reward_mean": {}, "episodes_total": 960, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -181.43706975607378, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_15-58-21", "training_iteration": 40, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756475901, "episode_len_mean": 50.0, "timesteps_since_restore": 48000, "time_since_restore": 4910.962848186493, "time_this_iter_s": 122.53744196891785, "iterations_since_restore": 40}
+{"timesteps_total": 49200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 115781.532, "num_steps_sampled": 49200, "update_time_ms": 2.471, "num_steps_trained": 49200, "load_time_ms": 0.65, "default": {"kl": 0.018592309206724167, "cur_lr": 4.999999873689376e-05, "entropy": 17.775829315185547, "total_loss": 3190.97412109375, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1547202467918396, "vf_explained_var": 0.2281986027956009, "vf_loss": 3191.1103515625}, "grad_time_ms": 717.619}, "pid": 3934253, "time_total_s": 5028.476491689682, "episode_reward_mean": -213.4334468931419, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -247.5537867115574, "policy_reward_mean": {}, "episodes_total": 984, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -175.2855057359052, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_16-00-19", "training_iteration": 41, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756476019, "episode_len_mean": 50.0, "timesteps_since_restore": 49200, "time_since_restore": 5028.476491689682, "time_this_iter_s": 117.51364350318909, "iterations_since_restore": 41}
+{"timesteps_total": 50400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 116700.271, "num_steps_sampled": 50400, "update_time_ms": 2.521, "num_steps_trained": 50400, "load_time_ms": 0.649, "default": {"kl": 0.017873523756861687, "cur_lr": 4.999999873689376e-05, "entropy": 17.768165588378906, "total_loss": 2994.9541015625, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1501152515411377, "vf_explained_var": 0.32763707637786865, "vf_loss": 2995.0859375}, "grad_time_ms": 708.999}, "pid": 3934253, "time_total_s": 5153.405420064926, "episode_reward_mean": -215.7174857830358, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -246.37354808212874, "policy_reward_mean": {}, "episodes_total": 1008, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -175.2855057359052, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_16-02-24", "training_iteration": 42, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756476144, "episode_len_mean": 50.0, "timesteps_since_restore": 50400, "time_since_restore": 5153.405420064926, "time_this_iter_s": 124.92892837524414, "iterations_since_restore": 42}
+{"timesteps_total": 51600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 113745.523, "num_steps_sampled": 51600, "update_time_ms": 2.494, "num_steps_trained": 51600, "load_time_ms": 0.652, "default": {"kl": 0.018063882365822792, "cur_lr": 4.999999873689376e-05, "entropy": 17.706390380859375, "total_loss": 2835.5146484375, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1458158642053604, "vf_explained_var": 0.32134178280830383, "vf_loss": 2835.642333984375}, "grad_time_ms": 712.86}, "pid": 3934253, "time_total_s": 5253.124094724655, "episode_reward_mean": -214.39119330004388, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -246.37354808212874, "policy_reward_mean": {}, "episodes_total": 1032, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -175.05672191815188, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_16-04-04", "training_iteration": 43, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756476244, "episode_len_mean": 50.0, "timesteps_since_restore": 51600, "time_since_restore": 5253.124094724655, "time_this_iter_s": 99.718674659729, "iterations_since_restore": 43}
+{"timesteps_total": 52800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 112197.151, "num_steps_sampled": 52800, "update_time_ms": 2.539, "num_steps_trained": 52800, "load_time_ms": 0.693, "default": {"kl": 0.018111437559127808, "cur_lr": 4.999999873689376e-05, "entropy": 17.677021026611328, "total_loss": 3190.59130859375, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14791905879974365, "vf_explained_var": 0.15911920368671417, "vf_loss": 3190.720947265625}, "grad_time_ms": 710.067}, "pid": 3934253, "time_total_s": 5376.435137987137, "episode_reward_mean": -213.18503772057986, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -246.37354808212874, "policy_reward_mean": {}, "episodes_total": 1056, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -175.05672191815188, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_16-06-07", "training_iteration": 44, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756476367, "episode_len_mean": 50.0, "timesteps_since_restore": 52800, "time_since_restore": 5376.435137987137, "time_this_iter_s": 123.31104326248169, "iterations_since_restore": 44}
+{"timesteps_total": 54000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 116778.42, "num_steps_sampled": 54000, "update_time_ms": 2.573, "num_steps_trained": 54000, "load_time_ms": 0.695, "default": {"kl": 0.01823728159070015, "cur_lr": 4.999999873689376e-05, "entropy": 17.698951721191406, "total_loss": 2804.24169921875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.15112829208374023, "vf_explained_var": 0.2769123613834381, "vf_loss": 2804.37451171875}, "grad_time_ms": 704.293}, "pid": 3934253, "time_total_s": 5511.011833429337, "episode_reward_mean": -213.08326170254938, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -245.10308690094269, "policy_reward_mean": {}, "episodes_total": 1080, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -175.05672191815188, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_16-08-22", "training_iteration": 45, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756476502, "episode_len_mean": 50.0, "timesteps_since_restore": 54000, "time_since_restore": 5511.011833429337, "time_this_iter_s": 134.5766954421997, "iterations_since_restore": 45}
+{"timesteps_total": 55200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 118468.282, "num_steps_sampled": 55200, "update_time_ms": 2.579, "num_steps_trained": 55200, "load_time_ms": 0.722, "default": {"kl": 0.017772618681192398, "cur_lr": 4.999999873689376e-05, "entropy": 17.67725372314453, "total_loss": 2877.27392578125, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1525329202413559, "vf_explained_var": 0.30773845314979553, "vf_loss": 2877.40869140625}, "grad_time_ms": 706.245}, "pid": 3934253, "time_total_s": 5649.724349737167, "episode_reward_mean": -211.9924811523262, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -244.81933204732172, "policy_reward_mean": {}, "episodes_total": 1104, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -175.05672191815188, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_16-10-40", "training_iteration": 46, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756476640, "episode_len_mean": 50.0, "timesteps_since_restore": 55200, "time_since_restore": 5649.724349737167, "time_this_iter_s": 138.7125163078308, "iterations_since_restore": 46}
+{"timesteps_total": 56400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 119917.311, "num_steps_sampled": 56400, "update_time_ms": 2.571, "num_steps_trained": 56400, "load_time_ms": 0.729, "default": {"kl": 0.017335502430796623, "cur_lr": 4.999999873689376e-05, "entropy": 17.574216842651367, "total_loss": 2686.016845703125, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.15624472498893738, "vf_explained_var": 0.31639328598976135, "vf_loss": 2686.155517578125}, "grad_time_ms": 704.712}, "pid": 3934253, "time_total_s": 5780.025140762329, "episode_reward_mean": -212.06696124329548, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -244.81933204732172, "policy_reward_mean": {}, "episodes_total": 1128, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -176.47354464694985, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_16-12-51", "training_iteration": 47, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756476771, "episode_len_mean": 50.0, "timesteps_since_restore": 56400, "time_since_restore": 5780.025140762329, "time_this_iter_s": 130.30079102516174, "iterations_since_restore": 47}
+{"timesteps_total": 57600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 120133.195, "num_steps_sampled": 57600, "update_time_ms": 2.578, "num_steps_trained": 57600, "load_time_ms": 0.692, "default": {"kl": 0.018136359751224518, "cur_lr": 4.999999873689376e-05, "entropy": 17.61043930053711, "total_loss": 2544.2529296875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.15641115605831146, "vf_explained_var": 0.32302284240722656, "vf_loss": 2544.39111328125}, "grad_time_ms": 705.707}, "pid": 3934253, "time_total_s": 5912.872404336929, "episode_reward_mean": -211.27840171927173, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -242.75502909465445, "policy_reward_mean": {}, "episodes_total": 1152, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -180.22750393736035, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_16-15-03", "training_iteration": 48, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756476903, "episode_len_mean": 50.0, "timesteps_since_restore": 57600, "time_since_restore": 5912.872404336929, "time_this_iter_s": 132.84726357460022, "iterations_since_restore": 48}
+{"timesteps_total": 58800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 126388.469, "num_steps_sampled": 58800, "update_time_ms": 2.5, "num_steps_trained": 58800, "load_time_ms": 0.692, "default": {"kl": 0.017506470903754234, "cur_lr": 4.999999873689376e-05, "entropy": 17.62823486328125, "total_loss": 3480.99951171875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.15134403109550476, "vf_explained_var": 0.1799653172492981, "vf_loss": 3481.13330078125}, "grad_time_ms": 699.242}, "pid": 3934253, "time_total_s": 6059.384567737579, "episode_reward_mean": -211.70786122380647, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -243.36017384063356, "policy_reward_mean": {}, "episodes_total": 1176, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -165.89434605077207, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_16-17-30", "training_iteration": 49, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756477050, "episode_len_mean": 50.0, "timesteps_since_restore": 58800, "time_since_restore": 6059.384567737579, "time_this_iter_s": 146.51216340065002, "iterations_since_restore": 49}
+{"timesteps_total": 60000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 127471.196, "num_steps_sampled": 60000, "update_time_ms": 2.503, "num_steps_trained": 60000, "load_time_ms": 0.688, "default": {"kl": 0.0181845985352993, "cur_lr": 4.999999873689376e-05, "entropy": 17.62104606628418, "total_loss": 3795.71875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14132875204086304, "vf_explained_var": -0.15382134914398193, "vf_loss": 3795.842041015625}, "grad_time_ms": 717.066}, "pid": 3934253, "time_total_s": 6192.928519487381, "episode_reward_mean": -210.11993828827156, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -243.36017384063356, "policy_reward_mean": {}, "episodes_total": 1200, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -165.89434605077207, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_16-19-44", "training_iteration": 50, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756477184, "episode_len_mean": 50.0, "timesteps_since_restore": 60000, "time_since_restore": 6192.928519487381, "time_this_iter_s": 133.54395174980164, "iterations_since_restore": 50}
+{"timesteps_total": 61200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 127189.731, "num_steps_sampled": 61200, "update_time_ms": 2.526, "num_steps_trained": 61200, "load_time_ms": 0.684, "default": {"kl": 0.018260452896356583, "cur_lr": 4.999999873689376e-05, "entropy": 17.566057205200195, "total_loss": 3529.8896484375, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1244841143488884, "vf_explained_var": -0.031975701451301575, "vf_loss": 3529.995361328125}, "grad_time_ms": 746.291}, "pid": 3934253, "time_total_s": 6307.920372962952, "episode_reward_mean": -208.46888390923715, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -243.36017384063356, "policy_reward_mean": {}, "episodes_total": 1224, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -165.89434605077207, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_16-21-39", "training_iteration": 51, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756477299, "episode_len_mean": 50.0, "timesteps_since_restore": 61200, "time_since_restore": 6307.920372962952, "time_this_iter_s": 114.99185347557068, "iterations_since_restore": 51}
+{"timesteps_total": 62400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 126173.789, "num_steps_sampled": 62400, "update_time_ms": 2.55, "num_steps_trained": 62400, "load_time_ms": 0.69, "default": {"kl": 0.018367886543273926, "cur_lr": 4.999999873689376e-05, "entropy": 17.582782745361328, "total_loss": 2911.344482421875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.15388014912605286, "vf_explained_var": 0.11462072283029556, "vf_loss": 2911.47998046875}, "grad_time_ms": 762.265}, "pid": 3934253, "time_total_s": 6422.849180936813, "episode_reward_mean": -207.44610162930013, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -243.36017384063356, "policy_reward_mean": {}, "episodes_total": 1248, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -165.89434605077207, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_16-23-34", "training_iteration": 52, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756477414, "episode_len_mean": 50.0, "timesteps_since_restore": 62400, "time_since_restore": 6422.849180936813, "time_this_iter_s": 114.9288079738617, "iterations_since_restore": 52}
+{"timesteps_total": 63600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 127713.001, "num_steps_sampled": 63600, "update_time_ms": 2.559, "num_steps_trained": 63600, "load_time_ms": 0.695, "default": {"kl": 0.0152328722178936, "cur_lr": 4.999999873689376e-05, "entropy": 17.559120178222656, "total_loss": 2936.9033203125, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12449096143245697, "vf_explained_var": 0.13940726220607758, "vf_loss": 2937.01220703125}, "grad_time_ms": 754.196}, "pid": 3934253, "time_total_s": 6537.878677845001, "episode_reward_mean": -205.3037430007791, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -243.33831834713772, "policy_reward_mean": {}, "episodes_total": 1272, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -165.89434605077207, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_16-25-29", "training_iteration": 53, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756477529, "episode_len_mean": 50.0, "timesteps_since_restore": 63600, "time_since_restore": 6537.878677845001, "time_this_iter_s": 115.02949690818787, "iterations_since_restore": 53}
+{"timesteps_total": 64800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 128614.03, "num_steps_sampled": 64800, "update_time_ms": 2.534, "num_steps_trained": 64800, "load_time_ms": 0.655, "default": {"kl": 0.015593416057527065, "cur_lr": 4.999999873689376e-05, "entropy": 17.57250213623047, "total_loss": 3269.6923828125, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12995809316635132, "vf_explained_var": 0.041274651885032654, "vf_loss": 3269.806640625}, "grad_time_ms": 745.365}, "pid": 3934253, "time_total_s": 6670.112357854843, "episode_reward_mean": -205.87594885722905, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -243.33831834713772, "policy_reward_mean": {}, "episodes_total": 1296, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -171.90663959860424, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_16-27-41", "training_iteration": 54, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756477661, "episode_len_mean": 50.0, "timesteps_since_restore": 64800, "time_since_restore": 6670.112357854843, "time_this_iter_s": 132.23368000984192, "iterations_since_restore": 54}
+{"timesteps_total": 66000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 129046.204, "num_steps_sampled": 66000, "update_time_ms": 2.52, "num_steps_trained": 66000, "load_time_ms": 0.639, "default": {"kl": 0.01628641039133072, "cur_lr": 4.999999873689376e-05, "entropy": 17.494834899902344, "total_loss": 3003.40478515625, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.133028045296669, "vf_explained_var": -0.06501490622758865, "vf_loss": 3003.521240234375}, "grad_time_ms": 721.143}, "pid": 3934253, "time_total_s": 6808.767722606659, "episode_reward_mean": -204.17025147553716, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -239.4554018600887, "policy_reward_mean": {}, "episodes_total": 1320, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -174.49680020462705, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_16-29-59", "training_iteration": 55, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756477799, "episode_len_mean": 50.0, "timesteps_since_restore": 66000, "time_since_restore": 6808.767722606659, "time_this_iter_s": 138.6553647518158, "iterations_since_restore": 55}
+{"timesteps_total": 67200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 124872.43, "num_steps_sampled": 67200, "update_time_ms": 2.474, "num_steps_trained": 67200, "load_time_ms": 0.617, "default": {"kl": 0.018313659355044365, "cur_lr": 4.999999873689376e-05, "entropy": 17.51993751525879, "total_loss": 3227.48779296875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14256832003593445, "vf_explained_var": -0.046293098479509354, "vf_loss": 3227.612060546875}, "grad_time_ms": 724.948}, "pid": 3934253, "time_total_s": 6905.780424118042, "episode_reward_mean": -202.69798806398597, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -239.4554018600887, "policy_reward_mean": {}, "episodes_total": 1344, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -174.40608955184834, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_16-31-36", "training_iteration": 56, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756477896, "episode_len_mean": 50.0, "timesteps_since_restore": 67200, "time_since_restore": 6905.780424118042, "time_this_iter_s": 97.01270151138306, "iterations_since_restore": 56}
+{"timesteps_total": 68400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 124572.464, "num_steps_sampled": 68400, "update_time_ms": 2.557, "num_steps_trained": 68400, "load_time_ms": 0.617, "default": {"kl": 0.015577811747789383, "cur_lr": 4.999999873689376e-05, "entropy": 17.429256439208984, "total_loss": 2839.56689453125, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1422598659992218, "vf_explained_var": 0.25796782970428467, "vf_loss": 2839.693115234375}, "grad_time_ms": 725.939}, "pid": 3934253, "time_total_s": 7033.093000173569, "episode_reward_mean": -203.0628973147633, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -236.39727673502475, "policy_reward_mean": {}, "episodes_total": 1368, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -174.40608955184834, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_16-33-44", "training_iteration": 57, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756478024, "episode_len_mean": 50.0, "timesteps_since_restore": 68400, "time_since_restore": 7033.093000173569, "time_this_iter_s": 127.31257605552673, "iterations_since_restore": 57}
+{"timesteps_total": 69600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 127368.019, "num_steps_sampled": 69600, "update_time_ms": 2.587, "num_steps_trained": 69600, "load_time_ms": 0.644, "default": {"kl": 0.01565130613744259, "cur_lr": 4.999999873689376e-05, "entropy": 17.373971939086914, "total_loss": 3137.237548828125, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1458209902048111, "vf_explained_var": 0.18359674513339996, "vf_loss": 3137.36767578125}, "grad_time_ms": 699.675}, "pid": 3934253, "time_total_s": 7193.634396314621, "episode_reward_mean": -201.6865593275633, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -237.16475391834197, "policy_reward_mean": {}, "episodes_total": 1392, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -162.37320864558674, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_16-36-24", "training_iteration": 58, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756478184, "episode_len_mean": 50.0, "timesteps_since_restore": 69600, "time_since_restore": 7193.634396314621, "time_this_iter_s": 160.54139614105225, "iterations_since_restore": 58}
+{"timesteps_total": 70800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 126269.84, "num_steps_sampled": 70800, "update_time_ms": 2.615, "num_steps_trained": 70800, "load_time_ms": 0.643, "default": {"kl": 0.017643585801124573, "cur_lr": 4.999999873689376e-05, "entropy": 17.52008819580078, "total_loss": 2227.716064453125, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14641273021697998, "vf_explained_var": 0.3743492662906647, "vf_loss": 2227.8447265625}, "grad_time_ms": 696.813}, "pid": 3934253, "time_total_s": 7329.136283874512, "episode_reward_mean": -202.4014445057027, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -237.16475391834197, "policy_reward_mean": {}, "episodes_total": 1416, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -162.37320864558674, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_16-38-40", "training_iteration": 59, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756478320, "episode_len_mean": 50.0, "timesteps_since_restore": 70800, "time_since_restore": 7329.136283874512, "time_this_iter_s": 135.50188755989075, "iterations_since_restore": 59}
+{"timesteps_total": 72000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 126855.389, "num_steps_sampled": 72000, "update_time_ms": 2.553, "num_steps_trained": 72000, "load_time_ms": 0.641, "default": {"kl": 0.0166630856692791, "cur_lr": 4.999999873689376e-05, "entropy": 17.444067001342773, "total_loss": 2494.462646484375, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1459917575120926, "vf_explained_var": 0.32033035159111023, "vf_loss": 2494.591796875}, "grad_time_ms": 699.478}, "pid": 3934253, "time_total_s": 7468.560915708542, "episode_reward_mean": -203.28037131250483, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -242.41267235711027, "policy_reward_mean": {}, "episodes_total": 1440, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -162.37320864558674, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_16-40-59", "training_iteration": 60, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756478459, "episode_len_mean": 50.0, "timesteps_since_restore": 72000, "time_since_restore": 7468.560915708542, "time_this_iter_s": 139.42463183403015, "iterations_since_restore": 60}
+{"timesteps_total": 73200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 127224.631, "num_steps_sampled": 73200, "update_time_ms": 2.522, "num_steps_trained": 73200, "load_time_ms": 0.678, "default": {"kl": 0.0184915941208601, "cur_lr": 4.999999873689376e-05, "entropy": 17.35077667236328, "total_loss": 2228.360595703125, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.15222874283790588, "vf_explained_var": 0.32440924644470215, "vf_loss": 2228.493896484375}, "grad_time_ms": 692.227}, "pid": 3934253, "time_total_s": 7587.17391872406, "episode_reward_mean": -201.8855045823159, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -242.41267235711027, "policy_reward_mean": {}, "episodes_total": 1464, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -162.37320864558674, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_16-42-58", "training_iteration": 61, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756478578, "episode_len_mean": 50.0, "timesteps_since_restore": 73200, "time_since_restore": 7587.17391872406, "time_this_iter_s": 118.61300301551819, "iterations_since_restore": 61}
+{"timesteps_total": 74400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 129470.924, "num_steps_sampled": 74400, "update_time_ms": 2.492, "num_steps_trained": 74400, "load_time_ms": 0.668, "default": {"kl": 0.01812606118619442, "cur_lr": 4.999999873689376e-05, "entropy": 17.345386505126953, "total_loss": 1536.971435546875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14230865240097046, "vf_explained_var": 0.4932720363140106, "vf_loss": 1537.095458984375}, "grad_time_ms": 690.446}, "pid": 3934253, "time_total_s": 7724.547788619995, "episode_reward_mean": -203.61260778759706, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -242.41267235711027, "policy_reward_mean": {}, "episodes_total": 1488, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -167.70166226128026, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_16-45-15", "training_iteration": 62, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756478715, "episode_len_mean": 50.0, "timesteps_since_restore": 74400, "time_since_restore": 7724.547788619995, "time_this_iter_s": 137.37386989593506, "iterations_since_restore": 62}
+{"timesteps_total": 75600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 130895.803, "num_steps_sampled": 75600, "update_time_ms": 2.494, "num_steps_trained": 75600, "load_time_ms": 0.666, "default": {"kl": 0.018034812062978745, "cur_lr": 4.999999873689376e-05, "entropy": 17.338903427124023, "total_loss": 1522.239990234375, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.15658150613307953, "vf_explained_var": 0.5048775672912598, "vf_loss": 1522.37841796875}, "grad_time_ms": 689.206}, "pid": 3934253, "time_total_s": 7853.81393122673, "episode_reward_mean": -204.79683966833977, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -243.82422338554372, "policy_reward_mean": {}, "episodes_total": 1512, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -178.07009410244865, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_16-47-25", "training_iteration": 63, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756478845, "episode_len_mean": 50.0, "timesteps_since_restore": 75600, "time_since_restore": 7853.81393122673, "time_this_iter_s": 129.26614260673523, "iterations_since_restore": 63}
+{"timesteps_total": 76800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 128562.827, "num_steps_sampled": 76800, "update_time_ms": 2.455, "num_steps_trained": 76800, "load_time_ms": 0.668, "default": {"kl": 0.01748146489262581, "cur_lr": 4.999999873689376e-05, "entropy": 17.300491333007812, "total_loss": 1020.3151245117188, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1468556821346283, "vf_explained_var": 0.628902018070221, "vf_loss": 1020.4442138671875}, "grad_time_ms": 685.694}, "pid": 3934253, "time_total_s": 7962.6811876297, "episode_reward_mean": -204.60964781539147, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -243.82422338554372, "policy_reward_mean": {}, "episodes_total": 1536, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -176.72126537076102, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_16-49-14", "training_iteration": 64, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756478954, "episode_len_mean": 50.0, "timesteps_since_restore": 76800, "time_since_restore": 7962.6811876297, "time_this_iter_s": 108.86725640296936, "iterations_since_restore": 64}
+{"timesteps_total": 78000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 130065.401, "num_steps_sampled": 78000, "update_time_ms": 2.495, "num_steps_trained": 78000, "load_time_ms": 0.671, "default": {"kl": 0.01788967289030552, "cur_lr": 4.999999873689376e-05, "entropy": 17.299461364746094, "total_loss": 1186.3619384765625, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14163149893283844, "vf_explained_var": 0.5919825434684753, "vf_loss": 1186.4854736328125}, "grad_time_ms": 706.33}, "pid": 3934253, "time_total_s": 8116.570593595505, "episode_reward_mean": -206.1822075156246, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -243.82422338554372, "policy_reward_mean": {}, "episodes_total": 1560, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -176.72126537076102, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_16-51-47", "training_iteration": 65, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756479107, "episode_len_mean": 50.0, "timesteps_since_restore": 78000, "time_since_restore": 8116.570593595505, "time_this_iter_s": 153.88940596580505, "iterations_since_restore": 65}
+{"timesteps_total": 79200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 133131.583, "num_steps_sampled": 79200, "update_time_ms": 2.57, "num_steps_trained": 79200, "load_time_ms": 0.685, "default": {"kl": 0.017699653282761574, "cur_lr": 4.999999873689376e-05, "entropy": 17.267019271850586, "total_loss": 845.4718627929688, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1545608788728714, "vf_explained_var": 0.7317812442779541, "vf_loss": 845.6085815429688}, "grad_time_ms": 682.49}, "pid": 3934253, "time_total_s": 8244.00701546669, "episode_reward_mean": -204.67036275163156, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -243.82422338554372, "policy_reward_mean": {}, "episodes_total": 1584, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -176.72126537076102, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_16-53-55", "training_iteration": 66, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756479235, "episode_len_mean": 50.0, "timesteps_since_restore": 79200, "time_since_restore": 8244.00701546669, "time_this_iter_s": 127.4364218711853, "iterations_since_restore": 66}
+{"timesteps_total": 80400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 134628.525, "num_steps_sampled": 80400, "update_time_ms": 2.499, "num_steps_trained": 80400, "load_time_ms": 0.681, "default": {"kl": 0.018020590767264366, "cur_lr": 4.999999873689376e-05, "entropy": 17.243600845336914, "total_loss": 908.90869140625, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.15663856267929077, "vf_explained_var": 0.6649714708328247, "vf_loss": 909.047119140625}, "grad_time_ms": 684.762}, "pid": 3934253, "time_total_s": 8386.31137752533, "episode_reward_mean": -203.45399373806507, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -236.6240604926094, "policy_reward_mean": {}, "episodes_total": 1608, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -176.72126537076102, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_16-56-17", "training_iteration": 67, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756479377, "episode_len_mean": 50.0, "timesteps_since_restore": 80400, "time_since_restore": 8386.31137752533, "time_this_iter_s": 142.30436205863953, "iterations_since_restore": 67}
+{"timesteps_total": 81600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 127800.101, "num_steps_sampled": 81600, "update_time_ms": 2.453, "num_steps_trained": 81600, "load_time_ms": 0.659, "default": {"kl": 0.01779426634311676, "cur_lr": 4.999999873689376e-05, "entropy": 17.173952102661133, "total_loss": 923.4046020507812, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14696913957595825, "vf_explained_var": 0.6245005130767822, "vf_loss": 923.5335083007812}, "grad_time_ms": 714.332}, "pid": 3934253, "time_total_s": 8478.86295580864, "episode_reward_mean": -201.6099014965169, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -236.6240604926094, "policy_reward_mean": {}, "episodes_total": 1632, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -170.85541536790782, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_16-57-50", "training_iteration": 68, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756479470, "episode_len_mean": 50.0, "timesteps_since_restore": 81600, "time_since_restore": 8478.86295580864, "time_this_iter_s": 92.55157828330994, "iterations_since_restore": 68}
+{"timesteps_total": 82800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 127706.637, "num_steps_sampled": 82800, "update_time_ms": 2.508, "num_steps_trained": 82800, "load_time_ms": 0.664, "default": {"kl": 0.017506642267107964, "cur_lr": 4.999999873689376e-05, "entropy": 17.228662490844727, "total_loss": 955.548828125, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13391107320785522, "vf_explained_var": 0.6843433976173401, "vf_loss": 955.6649169921875}, "grad_time_ms": 717.92}, "pid": 3934253, "time_total_s": 8613.466737508774, "episode_reward_mean": -199.76279681389474, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -237.87893357886605, "policy_reward_mean": {}, "episodes_total": 1656, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -169.67883789220647, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_17-00-04", "training_iteration": 69, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756479604, "episode_len_mean": 50.0, "timesteps_since_restore": 82800, "time_since_restore": 8613.466737508774, "time_this_iter_s": 134.60378170013428, "iterations_since_restore": 69}
+{"timesteps_total": 84000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 128395.735, "num_steps_sampled": 84000, "update_time_ms": 2.614, "num_steps_trained": 84000, "load_time_ms": 0.666, "default": {"kl": 0.01776537112891674, "cur_lr": 4.999999873689376e-05, "entropy": 17.27729034423828, "total_loss": 858.9427490234375, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14407379925251007, "vf_explained_var": 0.6505129337310791, "vf_loss": 859.0687255859375}, "grad_time_ms": 718.019}, "pid": 3934253, "time_total_s": 8759.78401517868, "episode_reward_mean": -200.28976271340775, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -237.87893357886605, "policy_reward_mean": {}, "episodes_total": 1680, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -166.50156901737446, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_17-02-31", "training_iteration": 70, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756479751, "episode_len_mean": 50.0, "timesteps_since_restore": 84000, "time_since_restore": 8759.78401517868, "time_this_iter_s": 146.31727766990662, "iterations_since_restore": 70}
+{"timesteps_total": 85200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 127439.452, "num_steps_sampled": 85200, "update_time_ms": 2.607, "num_steps_trained": 85200, "load_time_ms": 0.634, "default": {"kl": 0.017438506707549095, "cur_lr": 4.999999873689376e-05, "entropy": 17.173513412475586, "total_loss": 497.2986145019531, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14505235850811005, "vf_explained_var": 0.8089240193367004, "vf_loss": 497.4259948730469}, "grad_time_ms": 715.196}, "pid": 3934253, "time_total_s": 8868.804517507553, "episode_reward_mean": -196.78183297555998, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -237.87893357886605, "policy_reward_mean": {}, "episodes_total": 1704, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -162.7454707928377, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_17-04-20", "training_iteration": 71, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756479860, "episode_len_mean": 50.0, "timesteps_since_restore": 85200, "time_since_restore": 8868.804517507553, "time_this_iter_s": 109.02050232887268, "iterations_since_restore": 71}
+{"timesteps_total": 86400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 126949.793, "num_steps_sampled": 86400, "update_time_ms": 2.563, "num_steps_trained": 86400, "load_time_ms": 0.632, "default": {"kl": 0.017478276044130325, "cur_lr": 4.999999873689376e-05, "entropy": 17.1412296295166, "total_loss": 537.347412109375, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1449865847826004, "vf_explained_var": 0.8066643476486206, "vf_loss": 537.4746704101562}, "grad_time_ms": 716.498}, "pid": 3934253, "time_total_s": 9001.29467010498, "episode_reward_mean": -196.7515997280192, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -237.87893357886605, "policy_reward_mean": {}, "episodes_total": 1728, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -162.7454707928377, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_17-06-32", "training_iteration": 72, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756479992, "episode_len_mean": 50.0, "timesteps_since_restore": 86400, "time_since_restore": 9001.29467010498, "time_this_iter_s": 132.49015259742737, "iterations_since_restore": 72}
+{"timesteps_total": 87600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 126478.744, "num_steps_sampled": 87600, "update_time_ms": 2.579, "num_steps_trained": 87600, "load_time_ms": 0.628, "default": {"kl": 0.016992026939988136, "cur_lr": 4.999999873689376e-05, "entropy": 17.1573486328125, "total_loss": 668.7613525390625, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.15834328532218933, "vf_explained_var": 0.7176796793937683, "vf_loss": 668.9024658203125}, "grad_time_ms": 720.878}, "pid": 3934253, "time_total_s": 9125.894088745117, "episode_reward_mean": -196.29779407045845, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -235.77930229587113, "policy_reward_mean": {}, "episodes_total": 1752, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -160.0539174982735, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_17-08-37", "training_iteration": 73, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756480117, "episode_len_mean": 50.0, "timesteps_since_restore": 87600, "time_since_restore": 9125.894088745117, "time_this_iter_s": 124.59941864013672, "iterations_since_restore": 73}
+{"timesteps_total": 88800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 127925.017, "num_steps_sampled": 88800, "update_time_ms": 2.58, "num_steps_trained": 88800, "load_time_ms": 0.638, "default": {"kl": 0.01787766069173813, "cur_lr": 4.999999873689376e-05, "entropy": 17.137168884277344, "total_loss": 539.1329956054688, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13451358675956726, "vf_explained_var": 0.7756462097167969, "vf_loss": 539.2493286132812}, "grad_time_ms": 745.174}, "pid": 3934253, "time_total_s": 9249.467748641968, "episode_reward_mean": -196.06748029454903, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -241.6755977787709, "policy_reward_mean": {}, "episodes_total": 1776, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -160.0539174982735, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_17-10-41", "training_iteration": 74, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756480241, "episode_len_mean": 50.0, "timesteps_since_restore": 88800, "time_since_restore": 9249.467748641968, "time_this_iter_s": 123.57365989685059, "iterations_since_restore": 74}
+{"timesteps_total": 90000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 124662.659, "num_steps_sampled": 90000, "update_time_ms": 2.552, "num_steps_trained": 90000, "load_time_ms": 0.646, "default": {"kl": 0.016736924648284912, "cur_lr": 4.999999873689376e-05, "entropy": 17.0623836517334, "total_loss": 505.48822021484375, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.141299769282341, "vf_explained_var": 0.784087598323822, "vf_loss": 505.6125793457031}, "grad_time_ms": 746.333}, "pid": 3934253, "time_total_s": 9370.744490146637, "episode_reward_mean": -195.75643804258007, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -241.6755977787709, "policy_reward_mean": {}, "episodes_total": 1800, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -160.0539174982735, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_17-12-42", "training_iteration": 75, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756480362, "episode_len_mean": 50.0, "timesteps_since_restore": 90000, "time_since_restore": 9370.744490146637, "time_this_iter_s": 121.27674150466919, "iterations_since_restore": 75}
+{"timesteps_total": 91200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 121177.374, "num_steps_sampled": 91200, "update_time_ms": 2.489, "num_steps_trained": 91200, "load_time_ms": 0.624, "default": {"kl": 0.018218128010630608, "cur_lr": 4.999999873689376e-05, "entropy": 17.026023864746094, "total_loss": 539.190673828125, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14476412534713745, "vf_explained_var": 0.7949026823043823, "vf_loss": 539.31689453125}, "grad_time_ms": 763.486}, "pid": 3934253, "time_total_s": 9463.499910831451, "episode_reward_mean": -197.00243101656838, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -241.6755977787709, "policy_reward_mean": {}, "episodes_total": 1824, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -160.0539174982735, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_17-14-15", "training_iteration": 76, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756480455, "episode_len_mean": 50.0, "timesteps_since_restore": 91200, "time_since_restore": 9463.499910831451, "time_this_iter_s": 92.75542068481445, "iterations_since_restore": 76}
+{"timesteps_total": 92400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 121798.333, "num_steps_sampled": 92400, "update_time_ms": 2.498, "num_steps_trained": 92400, "load_time_ms": 0.62, "default": {"kl": 0.017787037417292595, "cur_lr": 4.999999873689376e-05, "entropy": 17.07424545288086, "total_loss": 433.3902587890625, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1485578417778015, "vf_explained_var": 0.8211551308631897, "vf_loss": 433.52081298828125}, "grad_time_ms": 761.874}, "pid": 3934253, "time_total_s": 9611.997594594955, "episode_reward_mean": -196.73452598520976, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -241.6755977787709, "policy_reward_mean": {}, "episodes_total": 1848, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -164.4817344017371, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_17-16-43", "training_iteration": 77, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756480603, "episode_len_mean": 50.0, "timesteps_since_restore": 92400, "time_since_restore": 9611.997594594955, "time_this_iter_s": 148.49768376350403, "iterations_since_restore": 77}
+{"timesteps_total": 93600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 128079.512, "num_steps_sampled": 93600, "update_time_ms": 2.497, "num_steps_trained": 93600, "load_time_ms": 0.645, "default": {"kl": 0.01852409727871418, "cur_lr": 4.999999873689376e-05, "entropy": 17.058555603027344, "total_loss": 397.9156799316406, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.16270986199378967, "vf_explained_var": 0.8211359977722168, "vf_loss": 398.0596618652344}, "grad_time_ms": 752.817}, "pid": 3934253, "time_total_s": 9767.270104885101, "episode_reward_mean": -195.6968907137477, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -235.19605511971818, "policy_reward_mean": {}, "episodes_total": 1872, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -164.4817344017371, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_17-19-18", "training_iteration": 78, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756480758, "episode_len_mean": 50.0, "timesteps_since_restore": 93600, "time_since_restore": 9767.270104885101, "time_this_iter_s": 155.27251029014587, "iterations_since_restore": 78}
+{"timesteps_total": 94800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 124008.762, "num_steps_sampled": 94800, "update_time_ms": 2.44, "num_steps_trained": 94800, "load_time_ms": 0.644, "default": {"kl": 0.01609645038843155, "cur_lr": 4.999999873689376e-05, "entropy": 17.04368019104004, "total_loss": 314.5567626953125, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.15425236523151398, "vf_explained_var": 0.8700137138366699, "vf_loss": 314.6947021484375}, "grad_time_ms": 759.731}, "pid": 3934253, "time_total_s": 9861.234502792358, "episode_reward_mean": -196.28833283553197, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -235.19605511971818, "policy_reward_mean": {}, "episodes_total": 1896, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -164.4817344017371, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_17-20-52", "training_iteration": 79, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756480852, "episode_len_mean": 50.0, "timesteps_since_restore": 94800, "time_since_restore": 9861.234502792358, "time_this_iter_s": 93.96439790725708, "iterations_since_restore": 79}
+{"timesteps_total": 96000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 119963.043, "num_steps_sampled": 96000, "update_time_ms": 2.322, "num_steps_trained": 96000, "load_time_ms": 0.648, "default": {"kl": 0.017412256449460983, "cur_lr": 4.999999873689376e-05, "entropy": 16.991172790527344, "total_loss": 300.1842956542969, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.15490752458572388, "vf_explained_var": 0.8661372661590576, "vf_loss": 300.32159423828125}, "grad_time_ms": 746.461}, "pid": 3934253, "time_total_s": 9966.960909605026, "episode_reward_mean": -195.29564945059207, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -235.19605511971818, "policy_reward_mean": {}, "episodes_total": 1920, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -159.59112747436288, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_17-22-38", "training_iteration": 80, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756480958, "episode_len_mean": 50.0, "timesteps_since_restore": 96000, "time_since_restore": 9966.960909605026, "time_this_iter_s": 105.72640681266785, "iterations_since_restore": 80}
+{"timesteps_total": 97200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 123085.136, "num_steps_sampled": 97200, "update_time_ms": 2.358, "num_steps_trained": 97200, "load_time_ms": 0.65, "default": {"kl": 0.018388399854302406, "cur_lr": 4.999999873689376e-05, "entropy": 16.957988739013672, "total_loss": 435.7005310058594, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.15358111262321472, "vf_explained_var": 0.8013516068458557, "vf_loss": 435.8354797363281}, "grad_time_ms": 738.744}, "pid": 3934253, "time_total_s": 10107.12469124794, "episode_reward_mean": -195.21138806142923, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -238.3792524057925, "policy_reward_mean": {}, "episodes_total": 1944, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -159.59112747436288, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_17-24-58", "training_iteration": 81, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756481098, "episode_len_mean": 50.0, "timesteps_since_restore": 97200, "time_since_restore": 10107.12469124794, "time_this_iter_s": 140.16378164291382, "iterations_since_restore": 81}
+{"timesteps_total": 98400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 122775.768, "num_steps_sampled": 98400, "update_time_ms": 2.377, "num_steps_trained": 98400, "load_time_ms": 0.669, "default": {"kl": 0.01676376722753048, "cur_lr": 4.999999873689376e-05, "entropy": 16.881912231445312, "total_loss": 455.4905700683594, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.15382982790470123, "vf_explained_var": 0.7882832884788513, "vf_loss": 455.62738037109375}, "grad_time_ms": 733.703}, "pid": 3934253, "time_total_s": 10236.471656560898, "episode_reward_mean": -193.51017683169036, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -238.3792524057925, "policy_reward_mean": {}, "episodes_total": 1968, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -159.59112747436288, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_17-27-08", "training_iteration": 82, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756481228, "episode_len_mean": 50.0, "timesteps_since_restore": 98400, "time_since_restore": 10236.471656560898, "time_this_iter_s": 129.34696531295776, "iterations_since_restore": 82}
+{"timesteps_total": 99600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 122992.217, "num_steps_sampled": 99600, "update_time_ms": 2.418, "num_steps_trained": 99600, "load_time_ms": 0.667, "default": {"kl": 0.016289807856082916, "cur_lr": 4.999999873689376e-05, "entropy": 16.883893966674805, "total_loss": 278.7664794921875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14257968962192535, "vf_explained_var": 0.8755154013633728, "vf_loss": 278.8926086425781}, "grad_time_ms": 718.016}, "pid": 3934253, "time_total_s": 10363.079635858536, "episode_reward_mean": -193.8101555905133, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -238.3792524057925, "policy_reward_mean": {}, "episodes_total": 1992, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -159.59112747436288, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_17-29-14", "training_iteration": 83, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756481354, "episode_len_mean": 50.0, "timesteps_since_restore": 99600, "time_since_restore": 10363.079635858536, "time_this_iter_s": 126.60797929763794, "iterations_since_restore": 83}
+{"timesteps_total": 100800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 121729.185, "num_steps_sampled": 100800, "update_time_ms": 2.458, "num_steps_trained": 100800, "load_time_ms": 0.657, "default": {"kl": 0.01808132603764534, "cur_lr": 4.999999873689376e-05, "entropy": 16.88953399658203, "total_loss": 364.16656494140625, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1611376404762268, "vf_explained_var": 0.8535504341125488, "vf_loss": 364.30938720703125}, "grad_time_ms": 689.813}, "pid": 3934253, "time_total_s": 10473.741010189056, "episode_reward_mean": -194.8083754000186, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -244.0834730499058, "policy_reward_mean": {}, "episodes_total": 2016, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -158.90891938732824, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_17-31-05", "training_iteration": 84, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756481465, "episode_len_mean": 50.0, "timesteps_since_restore": 100800, "time_since_restore": 10473.741010189056, "time_this_iter_s": 110.66137433052063, "iterations_since_restore": 84}
+{"timesteps_total": 102000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 121568.223, "num_steps_sampled": 102000, "update_time_ms": 2.407, "num_steps_trained": 102000, "load_time_ms": 0.648, "default": {"kl": 0.016419248655438423, "cur_lr": 4.999999873689376e-05, "entropy": 16.816686630249023, "total_loss": 237.03546142578125, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14151573181152344, "vf_explained_var": 0.8933451175689697, "vf_loss": 237.16033935546875}, "grad_time_ms": 673.73}, "pid": 3934253, "time_total_s": 10593.246505260468, "episode_reward_mean": -192.0402432573778, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -244.0834730499058, "policy_reward_mean": {}, "episodes_total": 2040, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -158.90891938732824, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_17-33-04", "training_iteration": 85, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756481584, "episode_len_mean": 50.0, "timesteps_since_restore": 102000, "time_since_restore": 10593.246505260468, "time_this_iter_s": 119.50549507141113, "iterations_since_restore": 85}
+{"timesteps_total": 103200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 126660.424, "num_steps_sampled": 103200, "update_time_ms": 2.477, "num_steps_trained": 103200, "load_time_ms": 0.651, "default": {"kl": 0.017432495951652527, "cur_lr": 4.999999873689376e-05, "entropy": 16.767534255981445, "total_loss": 315.4970397949219, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14589375257492065, "vf_explained_var": 0.8593595027923584, "vf_loss": 315.6252746582031}, "grad_time_ms": 668.927}, "pid": 3934253, "time_total_s": 10736.875820159912, "episode_reward_mean": -192.24686534121082, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -244.0834730499058, "policy_reward_mean": {}, "episodes_total": 2064, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -156.46359577259705, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_17-35-28", "training_iteration": 86, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756481728, "episode_len_mean": 50.0, "timesteps_since_restore": 103200, "time_since_restore": 10736.875820159912, "time_this_iter_s": 143.62931489944458, "iterations_since_restore": 86}
+{"timesteps_total": 104400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 125743.944, "num_steps_sampled": 104400, "update_time_ms": 2.492, "num_steps_trained": 104400, "load_time_ms": 0.656, "default": {"kl": 0.017669349908828735, "cur_lr": 4.999999873689376e-05, "entropy": 16.812036514282227, "total_loss": 267.5138854980469, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.15393000841140747, "vf_explained_var": 0.8753257989883423, "vf_loss": 267.64990234375}, "grad_time_ms": 668.674}, "pid": 3934253, "time_total_s": 10876.206056833267, "episode_reward_mean": -191.51076350055698, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -244.0834730499058, "policy_reward_mean": {}, "episodes_total": 2088, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -156.46359577259705, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_17-37-47", "training_iteration": 87, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756481867, "episode_len_mean": 50.0, "timesteps_since_restore": 104400, "time_since_restore": 10876.206056833267, "time_this_iter_s": 139.3302366733551, "iterations_since_restore": 87}
+{"timesteps_total": 105600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 120972.7, "num_steps_sampled": 105600, "update_time_ms": 2.506, "num_steps_trained": 105600, "load_time_ms": 0.623, "default": {"kl": 0.01689998432993889, "cur_lr": 4.999999873689376e-05, "entropy": 16.69136619567871, "total_loss": 439.05120849609375, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1567053645849228, "vf_explained_var": 0.805030107498169, "vf_loss": 439.1907958984375}, "grad_time_ms": 681.062}, "pid": 3934253, "time_total_s": 10983.889906644821, "episode_reward_mean": -189.84582066774183, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -240.50411683754677, "policy_reward_mean": {}, "episodes_total": 2112, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -152.51348529183588, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_17-39-35", "training_iteration": 88, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756481975, "episode_len_mean": 50.0, "timesteps_since_restore": 105600, "time_since_restore": 10983.889906644821, "time_this_iter_s": 107.68384981155396, "iterations_since_restore": 88}
+{"timesteps_total": 106800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 123934.882, "num_steps_sampled": 106800, "update_time_ms": 2.49, "num_steps_trained": 106800, "load_time_ms": 0.626, "default": {"kl": 0.01723390817642212, "cur_lr": 4.999999873689376e-05, "entropy": 16.839763641357422, "total_loss": 313.3089294433594, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14374259114265442, "vf_explained_var": 0.8504605889320374, "vf_loss": 313.4351806640625}, "grad_time_ms": 669.345}, "pid": 3934253, "time_total_s": 11107.359429359436, "episode_reward_mean": -190.61900295321735, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -240.50411683754677, "policy_reward_mean": {}, "episodes_total": 2136, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -152.51348529183588, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_17-41-39", "training_iteration": 89, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756482099, "episode_len_mean": 50.0, "timesteps_since_restore": 106800, "time_since_restore": 11107.359429359436, "time_this_iter_s": 123.46952271461487, "iterations_since_restore": 89}
+{"timesteps_total": 108000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 126152.382, "num_steps_sampled": 108000, "update_time_ms": 2.516, "num_steps_trained": 108000, "load_time_ms": 0.623, "default": {"kl": 0.01680140011012554, "cur_lr": 4.999999873689376e-05, "entropy": 16.745079040527344, "total_loss": 342.0080871582031, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.15599854290485382, "vf_explained_var": 0.8507482409477234, "vf_loss": 342.14703369140625}, "grad_time_ms": 659.328}, "pid": 3934253, "time_total_s": 11235.161835432053, "episode_reward_mean": -190.20998737125626, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -240.50411683754677, "policy_reward_mean": {}, "episodes_total": 2160, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -152.51348529183588, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_17-43-46", "training_iteration": 90, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756482226, "episode_len_mean": 50.0, "timesteps_since_restore": 108000, "time_since_restore": 11235.161835432053, "time_this_iter_s": 127.80240607261658, "iterations_since_restore": 90}
+{"timesteps_total": 109200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 124292.415, "num_steps_sampled": 109200, "update_time_ms": 2.439, "num_steps_trained": 109200, "load_time_ms": 0.617, "default": {"kl": 0.015435642562806606, "cur_lr": 4.999999873689376e-05, "entropy": 16.718061447143555, "total_loss": 403.6151123046875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.129756897687912, "vf_explained_var": 0.8099173903465271, "vf_loss": 403.729248046875}, "grad_time_ms": 640.826}, "pid": 3934253, "time_total_s": 11356.541090488434, "episode_reward_mean": -189.15599179625715, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -240.14707734147564, "policy_reward_mean": {}, "episodes_total": 2184, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -152.51348529183588, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_17-45-48", "training_iteration": 91, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756482348, "episode_len_mean": 50.0, "timesteps_since_restore": 109200, "time_since_restore": 11356.541090488434, "time_this_iter_s": 121.37925505638123, "iterations_since_restore": 91}
+{"timesteps_total": 110400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 121373.638, "num_steps_sampled": 110400, "update_time_ms": 2.509, "num_steps_trained": 110400, "load_time_ms": 0.598, "default": {"kl": 0.015910038724541664, "cur_lr": 4.999999873689376e-05, "entropy": 16.68692970275879, "total_loss": 359.96844482421875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.15589465200901031, "vf_explained_var": 0.8525227904319763, "vf_loss": 360.1082458496094}, "grad_time_ms": 650.758}, "pid": 3934253, "time_total_s": 11456.798621892929, "episode_reward_mean": -185.9974027787964, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -240.14707734147564, "policy_reward_mean": {}, "episodes_total": 2208, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -152.51348529183588, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_17-47-28", "training_iteration": 92, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756482448, "episode_len_mean": 50.0, "timesteps_since_restore": 110400, "time_since_restore": 11456.798621892929, "time_this_iter_s": 100.25753140449524, "iterations_since_restore": 92}
+{"timesteps_total": 111600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 120579.803, "num_steps_sampled": 111600, "update_time_ms": 2.452, "num_steps_trained": 111600, "load_time_ms": 0.598, "default": {"kl": 0.016870131716132164, "cur_lr": 4.999999873689376e-05, "entropy": 16.627105712890625, "total_loss": 202.50332641601562, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13515739142894745, "vf_explained_var": 0.9027056097984314, "vf_loss": 202.62139892578125}, "grad_time_ms": 664.662}, "pid": 3934253, "time_total_s": 11575.607246160507, "episode_reward_mean": -184.28075541258278, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -240.14707734147564, "policy_reward_mean": {}, "episodes_total": 2232, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -156.2375228182839, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_17-49-27", "training_iteration": 93, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756482567, "episode_len_mean": 50.0, "timesteps_since_restore": 111600, "time_since_restore": 11575.607246160507, "time_this_iter_s": 118.80862426757812, "iterations_since_restore": 93}
+{"timesteps_total": 112800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 118513.786, "num_steps_sampled": 112800, "update_time_ms": 2.458, "num_steps_trained": 112800, "load_time_ms": 0.604, "default": {"kl": 0.01635323092341423, "cur_lr": 4.999999873689376e-05, "entropy": 16.571773529052734, "total_loss": 202.59608459472656, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13777993619441986, "vf_explained_var": 0.8907999396324158, "vf_loss": 202.71730041503906}, "grad_time_ms": 689.211}, "pid": 3934253, "time_total_s": 11665.854831933975, "episode_reward_mean": -183.06815936431977, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -230.12884374648553, "policy_reward_mean": {}, "episodes_total": 2256, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -155.35197419791174, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_17-50-57", "training_iteration": 94, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756482657, "episode_len_mean": 50.0, "timesteps_since_restore": 112800, "time_since_restore": 11665.854831933975, "time_this_iter_s": 90.24758577346802, "iterations_since_restore": 94}
+{"timesteps_total": 114000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 118112.503, "num_steps_sampled": 114000, "update_time_ms": 2.541, "num_steps_trained": 114000, "load_time_ms": 0.638, "default": {"kl": 0.01679901033639908, "cur_lr": 4.999999873689376e-05, "entropy": 16.693180084228516, "total_loss": 392.57073974609375, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.15203702449798584, "vf_explained_var": 0.8344202637672424, "vf_loss": 392.70574951171875}, "grad_time_ms": 689.018}, "pid": 3934253, "time_total_s": 11781.346488714218, "episode_reward_mean": -184.8951815855976, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -236.2370975894316, "policy_reward_mean": {}, "episodes_total": 2280, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -155.35197419791174, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_17-52-53", "training_iteration": 95, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756482773, "episode_len_mean": 50.0, "timesteps_since_restore": 114000, "time_since_restore": 11781.346488714218, "time_this_iter_s": 115.49165678024292, "iterations_since_restore": 95}
+{"timesteps_total": 115200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 113381.279, "num_steps_sampled": 115200, "update_time_ms": 2.48, "num_steps_trained": 115200, "load_time_ms": 0.636, "default": {"kl": 0.017182350158691406, "cur_lr": 4.999999873689376e-05, "entropy": 16.59419822692871, "total_loss": 327.04345703125, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14678317308425903, "vf_explained_var": 0.8305256366729736, "vf_loss": 327.1728515625}, "grad_time_ms": 690.153}, "pid": 3934253, "time_total_s": 11877.674539804459, "episode_reward_mean": -184.5234958852344, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -236.2370975894316, "policy_reward_mean": {}, "episodes_total": 2304, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -153.70410475921176, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_17-54-29", "training_iteration": 96, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756482869, "episode_len_mean": 50.0, "timesteps_since_restore": 115200, "time_since_restore": 11877.674539804459, "time_this_iter_s": 96.32805109024048, "iterations_since_restore": 96}
+{"timesteps_total": 116400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 113362.315, "num_steps_sampled": 116400, "update_time_ms": 2.472, "num_steps_trained": 116400, "load_time_ms": 0.636, "default": {"kl": 0.017371561378240585, "cur_lr": 4.999999873689376e-05, "entropy": 16.490705490112305, "total_loss": 211.58644104003906, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14773549139499664, "vf_explained_var": 0.8857764005661011, "vf_loss": 211.7165985107422}, "grad_time_ms": 678.808}, "pid": 3934253, "time_total_s": 12016.701777458191, "episode_reward_mean": -182.23667207649603, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -236.2370975894316, "policy_reward_mean": {}, "episodes_total": 2328, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -153.70410475921176, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_17-56-48", "training_iteration": 97, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756483008, "episode_len_mean": 50.0, "timesteps_since_restore": 116400, "time_since_restore": 12016.701777458191, "time_this_iter_s": 139.0272376537323, "iterations_since_restore": 97}
+{"timesteps_total": 117600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 113857.263, "num_steps_sampled": 117600, "update_time_ms": 2.434, "num_steps_trained": 117600, "load_time_ms": 0.639, "default": {"kl": 0.015952367335557938, "cur_lr": 4.999999873689376e-05, "entropy": 16.48573112487793, "total_loss": 260.2500915527344, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12603165209293365, "vf_explained_var": 0.8630385994911194, "vf_loss": 260.3599853515625}, "grad_time_ms": 671.948}, "pid": 3934253, "time_total_s": 12129.268003940582, "episode_reward_mean": -181.74283275609204, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -236.2370975894316, "policy_reward_mean": {}, "episodes_total": 2352, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -153.70410475921176, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_17-58-41", "training_iteration": 98, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756483121, "episode_len_mean": 50.0, "timesteps_since_restore": 117600, "time_since_restore": 12129.268003940582, "time_this_iter_s": 112.56622648239136, "iterations_since_restore": 98}
+{"timesteps_total": 118800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 111223.736, "num_steps_sampled": 118800, "update_time_ms": 2.448, "num_steps_trained": 118800, "load_time_ms": 0.636, "default": {"kl": 0.01815967448055744, "cur_lr": 4.999999873689376e-05, "entropy": 16.53923225402832, "total_loss": 143.78089904785156, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.15580715239048004, "vf_explained_var": 0.9172838926315308, "vf_loss": 143.91831970214844}, "grad_time_ms": 669.875}, "pid": 3934253, "time_total_s": 12226.381784915924, "episode_reward_mean": -177.3244781328566, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -223.9890509880485, "policy_reward_mean": {}, "episodes_total": 2376, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -153.70410475921176, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_18-00-18", "training_iteration": 99, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756483218, "episode_len_mean": 50.0, "timesteps_since_restore": 118800, "time_since_restore": 12226.381784915924, "time_this_iter_s": 97.1137809753418, "iterations_since_restore": 99}
+{"timesteps_total": 120000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 110873.774, "num_steps_sampled": 120000, "update_time_ms": 2.437, "num_steps_trained": 120000, "load_time_ms": 0.633, "default": {"kl": 0.016468364745378494, "cur_lr": 4.999999873689376e-05, "entropy": 16.499685287475586, "total_loss": 194.18292236328125, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14151686429977417, "vf_explained_var": 0.9113339185714722, "vf_loss": 194.30775451660156}, "grad_time_ms": 686.587}, "pid": 3934253, "time_total_s": 12350.85043144226, "episode_reward_mean": -177.8404594305838, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -223.9890509880485, "policy_reward_mean": {}, "episodes_total": 2400, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -153.55256333374888, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_18-02-22", "training_iteration": 100, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756483342, "episode_len_mean": 50.0, "timesteps_since_restore": 120000, "time_since_restore": 12350.85043144226, "time_this_iter_s": 124.46864652633667, "iterations_since_restore": 100}
+{"timesteps_total": 121200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 111372.389, "num_steps_sampled": 121200, "update_time_ms": 2.463, "num_steps_trained": 121200, "load_time_ms": 0.638, "default": {"kl": 0.015280604362487793, "cur_lr": 4.999999873689376e-05, "entropy": 16.443017959594727, "total_loss": 351.51165771484375, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14789825677871704, "vf_explained_var": 0.8464590311050415, "vf_loss": 351.64410400390625}, "grad_time_ms": 706.421}, "pid": 3934253, "time_total_s": 12477.413677215576, "episode_reward_mean": -177.50363631361705, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -237.2865543757983, "policy_reward_mean": {}, "episodes_total": 2424, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -153.55256333374888, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_18-04-29", "training_iteration": 101, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756483469, "episode_len_mean": 50.0, "timesteps_since_restore": 121200, "time_since_restore": 12477.413677215576, "time_this_iter_s": 126.56324577331543, "iterations_since_restore": 101}
+{"timesteps_total": 122400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 116905.836, "num_steps_sampled": 122400, "update_time_ms": 2.469, "num_steps_trained": 122400, "load_time_ms": 0.641, "default": {"kl": 0.017815299332141876, "cur_lr": 4.999999873689376e-05, "entropy": 16.31475067138672, "total_loss": 196.4730682373047, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1293335109949112, "vf_explained_var": 0.8965740203857422, "vf_loss": 196.5843505859375}, "grad_time_ms": 698.142}, "pid": 3934253, "time_total_s": 12632.923156023026, "episode_reward_mean": -176.86709660749798, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -237.2865543757983, "policy_reward_mean": {}, "episodes_total": 2448, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -152.76490594743353, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_18-07-04", "training_iteration": 102, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756483624, "episode_len_mean": 50.0, "timesteps_since_restore": 122400, "time_since_restore": 12632.923156023026, "time_this_iter_s": 155.50947880744934, "iterations_since_restore": 102}
+{"timesteps_total": 123600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 113790.619, "num_steps_sampled": 123600, "update_time_ms": 2.447, "num_steps_trained": 123600, "load_time_ms": 0.645, "default": {"kl": 0.01563744992017746, "cur_lr": 4.999999873689376e-05, "entropy": 16.338083267211914, "total_loss": 151.3992919921875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14109545946121216, "vf_explained_var": 0.9077298045158386, "vf_loss": 151.52456665039062}, "grad_time_ms": 691.221}, "pid": 3934253, "time_total_s": 12720.509969711304, "episode_reward_mean": -175.99193290191877, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -237.2865543757983, "policy_reward_mean": {}, "episodes_total": 2472, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -147.00338003430244, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_18-08-32", "training_iteration": 103, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756483712, "episode_len_mean": 50.0, "timesteps_since_restore": 123600, "time_since_restore": 12720.509969711304, "time_this_iter_s": 87.5868136882782, "iterations_since_restore": 103}
+{"timesteps_total": 124800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 114789.433, "num_steps_sampled": 124800, "update_time_ms": 2.446, "num_steps_trained": 124800, "load_time_ms": 0.643, "default": {"kl": 0.016803696751594543, "cur_lr": 4.999999873689376e-05, "entropy": 16.251419067382812, "total_loss": 151.03599548339844, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13679622113704681, "vf_explained_var": 0.8967797756195068, "vf_loss": 151.15579223632812}, "grad_time_ms": 680.611}, "pid": 3934253, "time_total_s": 12820.63918542862, "episode_reward_mean": -172.28727233323306, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -237.2865543757983, "policy_reward_mean": {}, "episodes_total": 2496, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -147.00338003430244, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_18-10-12", "training_iteration": 104, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756483812, "episode_len_mean": 50.0, "timesteps_since_restore": 124800, "time_since_restore": 12820.63918542862, "time_this_iter_s": 100.12921571731567, "iterations_since_restore": 104}
+{"timesteps_total": 126000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 116485.609, "num_steps_sampled": 126000, "update_time_ms": 2.39, "num_steps_trained": 126000, "load_time_ms": 0.609, "default": {"kl": 0.016549859195947647, "cur_lr": 4.999999873689376e-05, "entropy": 16.36214256286621, "total_loss": 301.8638916015625, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1474459022283554, "vf_explained_var": 0.866607666015625, "vf_loss": 301.99456787109375}, "grad_time_ms": 696.338}, "pid": 3934253, "time_total_s": 12953.248711824417, "episode_reward_mean": -173.19153721824375, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -228.24084146483688, "policy_reward_mean": {}, "episodes_total": 2520, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -147.00338003430244, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_18-12-25", "training_iteration": 105, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756483945, "episode_len_mean": 50.0, "timesteps_since_restore": 126000, "time_since_restore": 12953.248711824417, "time_this_iter_s": 132.60952639579773, "iterations_since_restore": 105}
+{"timesteps_total": 127200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 116791.533, "num_steps_sampled": 127200, "update_time_ms": 2.492, "num_steps_trained": 127200, "load_time_ms": 0.608, "default": {"kl": 0.01571556180715561, "cur_lr": 4.999999873689376e-05, "entropy": 16.307790756225586, "total_loss": 150.79681396484375, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1507481187582016, "vf_explained_var": 0.9222152233123779, "vf_loss": 150.93165588378906}, "grad_time_ms": 712.378}, "pid": 3934253, "time_total_s": 13052.797505378723, "episode_reward_mean": -172.00315892886397, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -228.24084146483688, "policy_reward_mean": {}, "episodes_total": 2544, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -147.00338003430244, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_18-14-04", "training_iteration": 106, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756484044, "episode_len_mean": 50.0, "timesteps_since_restore": 127200, "time_since_restore": 13052.797505378723, "time_this_iter_s": 99.54879355430603, "iterations_since_restore": 106}
+{"timesteps_total": 128400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 114973.748, "num_steps_sampled": 128400, "update_time_ms": 2.506, "num_steps_trained": 128400, "load_time_ms": 0.604, "default": {"kl": 0.016194190829992294, "cur_lr": 4.999999873689376e-05, "entropy": 16.16952896118164, "total_loss": 188.38453674316406, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12826089560985565, "vf_explained_var": 0.8953073024749756, "vf_loss": 188.4963836669922}, "grad_time_ms": 717.54}, "pid": 3934253, "time_total_s": 13173.698773622513, "episode_reward_mean": -171.93660035227012, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -228.24084146483688, "policy_reward_mean": {}, "episodes_total": 2568, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -154.05940271714744, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_18-16-05", "training_iteration": 107, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756484165, "episode_len_mean": 50.0, "timesteps_since_restore": 128400, "time_since_restore": 13173.698773622513, "time_this_iter_s": 120.90126824378967, "iterations_since_restore": 107}
+{"timesteps_total": 129600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 113312.925, "num_steps_sampled": 129600, "update_time_ms": 2.55, "num_steps_trained": 129600, "load_time_ms": 0.605, "default": {"kl": 0.015851590782403946, "cur_lr": 4.999999873689376e-05, "entropy": 16.239519119262695, "total_loss": 147.88504028320312, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13959604501724243, "vf_explained_var": 0.9218350648880005, "vf_loss": 148.0085906982422}, "grad_time_ms": 717.604}, "pid": 3934253, "time_total_s": 13269.655487060547, "episode_reward_mean": -172.38800804952464, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -228.24084146483688, "policy_reward_mean": {}, "episodes_total": 2592, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -154.05940271714744, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_18-17-41", "training_iteration": 108, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756484261, "episode_len_mean": 50.0, "timesteps_since_restore": 129600, "time_since_restore": 13269.655487060547, "time_this_iter_s": 95.95671343803406, "iterations_since_restore": 108}
+{"timesteps_total": 130800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 117926.066, "num_steps_sampled": 130800, "update_time_ms": 2.534, "num_steps_trained": 130800, "load_time_ms": 0.605, "default": {"kl": 0.017764806747436523, "cur_lr": 4.999999873689376e-05, "entropy": 16.124168395996094, "total_loss": 137.75177001953125, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1361183226108551, "vf_explained_var": 0.9212970733642578, "vf_loss": 137.86990356445312}, "grad_time_ms": 713.76}, "pid": 3934253, "time_total_s": 13412.86143398285, "episode_reward_mean": -170.3833210749433, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -228.24084146483688, "policy_reward_mean": {}, "episodes_total": 2616, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -152.87937694663307, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_18-20-04", "training_iteration": 109, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756484404, "episode_len_mean": 50.0, "timesteps_since_restore": 130800, "time_since_restore": 13412.86143398285, "time_this_iter_s": 143.20594692230225, "iterations_since_restore": 109}
+{"timesteps_total": 132000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 115004.932, "num_steps_sampled": 132000, "update_time_ms": 2.5, "num_steps_trained": 132000, "load_time_ms": 0.606, "default": {"kl": 0.01462532114237547, "cur_lr": 4.999999873689376e-05, "entropy": 16.235450744628906, "total_loss": 312.9838562011719, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12912686169147491, "vf_explained_var": 0.8360607624053955, "vf_loss": 313.09820556640625}, "grad_time_ms": 720.145}, "pid": 3934253, "time_total_s": 13508.183268070221, "episode_reward_mean": -170.65942585523808, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -235.71808497253244, "policy_reward_mean": {}, "episodes_total": 2640, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -152.87937694663307, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_18-21-40", "training_iteration": 110, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756484500, "episode_len_mean": 50.0, "timesteps_since_restore": 132000, "time_since_restore": 13508.183268070221, "time_this_iter_s": 95.32183408737183, "iterations_since_restore": 110}
+{"timesteps_total": 133200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 110919.135, "num_steps_sampled": 133200, "update_time_ms": 2.516, "num_steps_trained": 133200, "load_time_ms": 0.602, "default": {"kl": 0.015565955080091953, "cur_lr": 4.999999873689376e-05, "entropy": 16.11193084716797, "total_loss": 167.30422973632812, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12671928107738495, "vf_explained_var": 0.8889510035514832, "vf_loss": 167.4152069091797}, "grad_time_ms": 730.481}, "pid": 3934253, "time_total_s": 13593.992814540863, "episode_reward_mean": -169.53994936434026, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -235.71808497253244, "policy_reward_mean": {}, "episodes_total": 2664, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -151.91960658986196, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_18-23-06", "training_iteration": 111, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756484586, "episode_len_mean": 50.0, "timesteps_since_restore": 133200, "time_since_restore": 13593.992814540863, "time_this_iter_s": 85.80954647064209, "iterations_since_restore": 111}
+{"timesteps_total": 134400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 106473.289, "num_steps_sampled": 134400, "update_time_ms": 2.471, "num_steps_trained": 134400, "load_time_ms": 0.613, "default": {"kl": 0.016392739489674568, "cur_lr": 4.999999873689376e-05, "entropy": 16.099382400512695, "total_loss": 139.86541748046875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13682633638381958, "vf_explained_var": 0.9135033488273621, "vf_loss": 139.98565673828125}, "grad_time_ms": 731.735}, "pid": 3934253, "time_total_s": 13705.056573867798, "episode_reward_mean": -169.04459473864682, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -235.71808497253244, "policy_reward_mean": {}, "episodes_total": 2688, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -151.91960658986196, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_18-24-57", "training_iteration": 112, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756484697, "episode_len_mean": 50.0, "timesteps_since_restore": 134400, "time_since_restore": 13705.056573867798, "time_this_iter_s": 111.06375932693481, "iterations_since_restore": 112}
+{"timesteps_total": 135600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 110040.376, "num_steps_sampled": 135600, "update_time_ms": 2.501, "num_steps_trained": 135600, "load_time_ms": 0.617, "default": {"kl": 0.013566691428422928, "cur_lr": 4.999999873689376e-05, "entropy": 16.00737762451172, "total_loss": 143.7192840576172, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10995927453041077, "vf_explained_var": 0.9082484841346741, "vf_loss": 143.8155059814453}, "grad_time_ms": 731.457}, "pid": 3934253, "time_total_s": 13828.311593294144, "episode_reward_mean": -167.68415176884224, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -235.71808497253244, "policy_reward_mean": {}, "episodes_total": 2712, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -151.91960658986196, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_18-27-00", "training_iteration": 113, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756484820, "episode_len_mean": 50.0, "timesteps_since_restore": 135600, "time_since_restore": 13828.311593294144, "time_this_iter_s": 123.25501942634583, "iterations_since_restore": 113}
+{"timesteps_total": 136800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 111408.427, "num_steps_sampled": 136800, "update_time_ms": 2.489, "num_steps_trained": 136800, "load_time_ms": 0.623, "default": {"kl": 0.01726832240819931, "cur_lr": 4.999999873689376e-05, "entropy": 16.0635929107666, "total_loss": 108.0864486694336, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.15256041288375854, "vf_explained_var": 0.9264135360717773, "vf_loss": 108.22151947021484}, "grad_time_ms": 730.998}, "pid": 3934253, "time_total_s": 13942.116770505905, "episode_reward_mean": -164.90202950385196, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -210.5466717526865, "policy_reward_mean": {}, "episodes_total": 2736, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -151.91960658986196, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_18-28-54", "training_iteration": 114, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756484934, "episode_len_mean": 50.0, "timesteps_since_restore": 136800, "time_since_restore": 13942.116770505905, "time_this_iter_s": 113.80517721176147, "iterations_since_restore": 114}
+{"timesteps_total": 138000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 108091.929, "num_steps_sampled": 138000, "update_time_ms": 2.5, "num_steps_trained": 138000, "load_time_ms": 0.642, "default": {"kl": 0.015876974910497665, "cur_lr": 4.999999873689376e-05, "entropy": 16.05762481689453, "total_loss": 195.14218139648438, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13380476832389832, "vf_explained_var": 0.8996444344520569, "vf_loss": 195.25990295410156}, "grad_time_ms": 738.229}, "pid": 3934253, "time_total_s": 14041.634573221207, "episode_reward_mean": -165.66463873140276, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -210.5466717526865, "policy_reward_mean": {}, "episodes_total": 2760, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -151.9062574171948, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_18-30-33", "training_iteration": 115, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756485033, "episode_len_mean": 50.0, "timesteps_since_restore": 138000, "time_since_restore": 14041.634573221207, "time_this_iter_s": 99.51780271530151, "iterations_since_restore": 115}
+{"timesteps_total": 139200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 108473.617, "num_steps_sampled": 139200, "update_time_ms": 2.427, "num_steps_trained": 139200, "load_time_ms": 0.652, "default": {"kl": 0.014280934818089008, "cur_lr": 4.999999873689376e-05, "entropy": 15.890507698059082, "total_loss": 205.2573699951172, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12453499436378479, "vf_explained_var": 0.874573826789856, "vf_loss": 205.367431640625}, "grad_time_ms": 727.677}, "pid": 3934253, "time_total_s": 14144.893615484238, "episode_reward_mean": -164.95825059903262, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -223.89982514164038, "policy_reward_mean": {}, "episodes_total": 2784, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -145.5091252897312, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_18-32-17", "training_iteration": 116, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756485137, "episode_len_mean": 50.0, "timesteps_since_restore": 139200, "time_since_restore": 14144.893615484238, "time_this_iter_s": 103.259042263031, "iterations_since_restore": 116}
+{"timesteps_total": 140400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 106538.732, "num_steps_sampled": 140400, "update_time_ms": 2.408, "num_steps_trained": 140400, "load_time_ms": 0.652, "default": {"kl": 0.015535826794803143, "cur_lr": 4.999999873689376e-05, "entropy": 15.938193321228027, "total_loss": 172.31068420410156, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1428217738866806, "vf_explained_var": 0.8901649117469788, "vf_loss": 172.43780517578125}, "grad_time_ms": 733.555}, "pid": 3934253, "time_total_s": 14246.504431962967, "episode_reward_mean": -165.22754313461462, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -223.89982514164038, "policy_reward_mean": {}, "episodes_total": 2808, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.7293238662343, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_18-33-58", "training_iteration": 117, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756485238, "episode_len_mean": 50.0, "timesteps_since_restore": 140400, "time_since_restore": 14246.504431962967, "time_this_iter_s": 101.61081647872925, "iterations_since_restore": 117}
+{"timesteps_total": 141600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 109051.889, "num_steps_sampled": 141600, "update_time_ms": 2.398, "num_steps_trained": 141600, "load_time_ms": 0.679, "default": {"kl": 0.015633488073945045, "cur_lr": 4.999999873689376e-05, "entropy": 15.816776275634766, "total_loss": 69.72467803955078, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14318110048770905, "vf_explained_var": 0.9485836029052734, "vf_loss": 69.85203552246094}, "grad_time_ms": 735.071}, "pid": 3934253, "time_total_s": 14367.609112024307, "episode_reward_mean": -164.35507615599417, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -223.89982514164038, "policy_reward_mean": {}, "episodes_total": 2832, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.7293238662343, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_18-35-59", "training_iteration": 118, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756485359, "episode_len_mean": 50.0, "timesteps_since_restore": 141600, "time_since_restore": 14367.609112024307, "time_this_iter_s": 121.10468006134033, "iterations_since_restore": 118}
+{"timesteps_total": 142800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 104404.834, "num_steps_sampled": 142800, "update_time_ms": 2.474, "num_steps_trained": 142800, "load_time_ms": 0.681, "default": {"kl": 0.016464034095406532, "cur_lr": 4.999999873689376e-05, "entropy": 15.754087448120117, "total_loss": 113.65615844726562, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13386313617229462, "vf_explained_var": 0.9267792105674744, "vf_loss": 113.77334594726562}, "grad_time_ms": 744.731}, "pid": 3934253, "time_total_s": 14464.442219495773, "episode_reward_mean": -163.03480213112596, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -223.89982514164038, "policy_reward_mean": {}, "episodes_total": 2856, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.7293238662343, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_18-37-36", "training_iteration": 119, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756485456, "episode_len_mean": 50.0, "timesteps_since_restore": 142800, "time_since_restore": 14464.442219495773, "time_this_iter_s": 96.83310747146606, "iterations_since_restore": 119}
+{"timesteps_total": 144000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 106612.481, "num_steps_sampled": 144000, "update_time_ms": 2.491, "num_steps_trained": 144000, "load_time_ms": 0.717, "default": {"kl": 0.016114315018057823, "cur_lr": 4.999999873689376e-05, "entropy": 15.789478302001953, "total_loss": 94.16368865966797, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1432938277721405, "vf_explained_var": 0.9434927701950073, "vf_loss": 94.2906723022461}, "grad_time_ms": 744.017}, "pid": 3934253, "time_total_s": 14581.834088563919, "episode_reward_mean": -162.69901184530545, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -227.78725353717078, "policy_reward_mean": {}, "episodes_total": 2880, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.7293238662343, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_18-39-34", "training_iteration": 120, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756485574, "episode_len_mean": 50.0, "timesteps_since_restore": 144000, "time_since_restore": 14581.834088563919, "time_this_iter_s": 117.39186906814575, "iterations_since_restore": 120}
+{"timesteps_total": 145200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 108981.101, "num_steps_sampled": 145200, "update_time_ms": 2.477, "num_steps_trained": 145200, "load_time_ms": 0.725, "default": {"kl": 0.015237444080412388, "cur_lr": 4.999999873689376e-05, "entropy": 15.758185386657715, "total_loss": 102.03178405761719, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13658057153224945, "vf_explained_var": 0.9302859902381897, "vf_loss": 102.15293884277344}, "grad_time_ms": 741.072}, "pid": 3934253, "time_total_s": 14691.300345897675, "episode_reward_mean": -162.34126236260016, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -227.78725353717078, "policy_reward_mean": {}, "episodes_total": 2904, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.7293238662343, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_18-41-23", "training_iteration": 121, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756485683, "episode_len_mean": 50.0, "timesteps_since_restore": 145200, "time_since_restore": 14691.300345897675, "time_this_iter_s": 109.4662573337555, "iterations_since_restore": 121}
+{"timesteps_total": 146400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 107680.629, "num_steps_sampled": 146400, "update_time_ms": 2.47, "num_steps_trained": 146400, "load_time_ms": 0.712, "default": {"kl": 0.015177453868091106, "cur_lr": 4.999999873689376e-05, "entropy": 15.74573802947998, "total_loss": 118.48878479003906, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13045638799667358, "vf_explained_var": 0.9191161394119263, "vf_loss": 118.60386657714844}, "grad_time_ms": 752.242}, "pid": 3934253, "time_total_s": 14789.470313310623, "episode_reward_mean": -162.87203130228417, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -227.78725353717078, "policy_reward_mean": {}, "episodes_total": 2928, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -152.73308602597515, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_18-43-01", "training_iteration": 122, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756485781, "episode_len_mean": 50.0, "timesteps_since_restore": 146400, "time_since_restore": 14789.470313310623, "time_this_iter_s": 98.16996741294861, "iterations_since_restore": 122}
+{"timesteps_total": 147600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 106273.709, "num_steps_sampled": 147600, "update_time_ms": 2.451, "num_steps_trained": 147600, "load_time_ms": 0.704, "default": {"kl": 0.0166685301810503, "cur_lr": 4.999999873689376e-05, "entropy": 15.755717277526855, "total_loss": 85.09651947021484, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13483008742332458, "vf_explained_var": 0.94509357213974, "vf_loss": 85.21446228027344}, "grad_time_ms": 757.57}, "pid": 3934253, "time_total_s": 14898.709458351135, "episode_reward_mean": -162.7500207409775, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -227.78725353717078, "policy_reward_mean": {}, "episodes_total": 2952, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -152.3745728662264, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_18-44-51", "training_iteration": 123, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756485891, "episode_len_mean": 50.0, "timesteps_since_restore": 147600, "time_since_restore": 14898.709458351135, "time_this_iter_s": 109.23914504051208, "iterations_since_restore": 123}
+{"timesteps_total": 148800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 105721.462, "num_steps_sampled": 148800, "update_time_ms": 2.425, "num_steps_trained": 148800, "load_time_ms": 0.702, "default": {"kl": 0.016147322952747345, "cur_lr": 4.999999873689376e-05, "entropy": 15.723305702209473, "total_loss": 119.27034759521484, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13226349651813507, "vf_explained_var": 0.9263350963592529, "vf_loss": 119.38626098632812}, "grad_time_ms": 760.658}, "pid": 3934253, "time_total_s": 15007.022426128387, "episode_reward_mean": -162.13066795735972, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -222.88002538887568, "policy_reward_mean": {}, "episodes_total": 2976, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -152.3745728662264, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_18-46-39", "training_iteration": 124, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756485999, "episode_len_mean": 50.0, "timesteps_since_restore": 148800, "time_since_restore": 15007.022426128387, "time_this_iter_s": 108.3129677772522, "iterations_since_restore": 124}
+{"timesteps_total": 150000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 105027.21, "num_steps_sampled": 150000, "update_time_ms": 2.406, "num_steps_trained": 150000, "load_time_ms": 0.684, "default": {"kl": 0.016684727743268013, "cur_lr": 4.999999873689376e-05, "entropy": 15.590709686279297, "total_loss": 77.29227447509766, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.15790709853172302, "vf_explained_var": 0.9456202387809753, "vf_loss": 77.43329620361328}, "grad_time_ms": 739.797}, "pid": 3934253, "time_total_s": 15099.387891292572, "episode_reward_mean": -161.97012023780732, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -196.12841532848358, "policy_reward_mean": {}, "episodes_total": 3000, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -152.3745728662264, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_18-48-11", "training_iteration": 125, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756486091, "episode_len_mean": 50.0, "timesteps_since_restore": 150000, "time_since_restore": 15099.387891292572, "time_this_iter_s": 92.36546516418457, "iterations_since_restore": 125}
+{"timesteps_total": 151200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 104640.818, "num_steps_sampled": 151200, "update_time_ms": 2.413, "num_steps_trained": 151200, "load_time_ms": 0.675, "default": {"kl": 0.015469375997781754, "cur_lr": 4.999999873689376e-05, "entropy": 15.370908737182617, "total_loss": 56.76454162597656, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.134979248046875, "vf_explained_var": 0.9565190076828003, "vf_loss": 56.88386154174805}, "grad_time_ms": 736.857}, "pid": 3934253, "time_total_s": 15198.75416469574, "episode_reward_mean": -161.44737111172932, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -196.12841532848358, "policy_reward_mean": {}, "episodes_total": 3024, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -152.36681112874857, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_18-49-51", "training_iteration": 126, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756486191, "episode_len_mean": 50.0, "timesteps_since_restore": 151200, "time_since_restore": 15198.75416469574, "time_this_iter_s": 99.36627340316772, "iterations_since_restore": 126}
+{"timesteps_total": 152400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 105887.097, "num_steps_sampled": 152400, "update_time_ms": 2.475, "num_steps_trained": 152400, "load_time_ms": 0.681, "default": {"kl": 0.017822375521063805, "cur_lr": 4.999999873689376e-05, "entropy": 15.559758186340332, "total_loss": 93.37821197509766, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.15308107435703278, "vf_explained_var": 0.9373614192008972, "vf_loss": 93.51325225830078}, "grad_time_ms": 745.519}, "pid": 3934253, "time_total_s": 15312.915374994278, "episode_reward_mean": -160.98241869124263, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -196.12841532848358, "policy_reward_mean": {}, "episodes_total": 3048, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -152.04356348579236, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_18-51-45", "training_iteration": 127, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756486305, "episode_len_mean": 50.0, "timesteps_since_restore": 152400, "time_since_restore": 15312.915374994278, "time_this_iter_s": 114.16121029853821, "iterations_since_restore": 127}
+{"timesteps_total": 153600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 104508.826, "num_steps_sampled": 153600, "update_time_ms": 2.481, "num_steps_trained": 153600, "load_time_ms": 0.652, "default": {"kl": 0.01617765799164772, "cur_lr": 4.999999873689376e-05, "entropy": 15.50704574584961, "total_loss": 83.95415496826172, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1417163461446762, "vf_explained_var": 0.9401677250862122, "vf_loss": 84.07949829101562}, "grad_time_ms": 749.997}, "pid": 3934253, "time_total_s": 15420.28134059906, "episode_reward_mean": -160.89652670146586, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -196.12841532848358, "policy_reward_mean": {}, "episodes_total": 3072, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -149.96283505629324, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_18-53-32", "training_iteration": 128, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756486412, "episode_len_mean": 50.0, "timesteps_since_restore": 153600, "time_since_restore": 15420.28134059906, "time_this_iter_s": 107.3659656047821, "iterations_since_restore": 128}
+{"timesteps_total": 154800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 102807.243, "num_steps_sampled": 154800, "update_time_ms": 2.451, "num_steps_trained": 154800, "load_time_ms": 0.655, "default": {"kl": 0.012704680673778057, "cur_lr": 4.999999873689376e-05, "entropy": 15.458696365356445, "total_loss": 92.21479034423828, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1063886359333992, "vf_explained_var": 0.9457715153694153, "vf_loss": 92.30831909179688}, "grad_time_ms": 746.283}, "pid": 3934253, "time_total_s": 15500.06122136116, "episode_reward_mean": -160.04755913315933, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -196.3103197721101, "policy_reward_mean": {}, "episodes_total": 3096, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -149.96283505629324, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_18-54-52", "training_iteration": 129, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756486492, "episode_len_mean": 50.0, "timesteps_since_restore": 154800, "time_since_restore": 15500.06122136116, "time_this_iter_s": 79.77988076210022, "iterations_since_restore": 129}
+{"timesteps_total": 156000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 103082.504, "num_steps_sampled": 156000, "update_time_ms": 2.458, "num_steps_trained": 156000, "load_time_ms": 0.624, "default": {"kl": 0.015438392758369446, "cur_lr": 4.999999873689376e-05, "entropy": 15.547718048095703, "total_loss": 95.83563232421875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11330587416887283, "vf_explained_var": 0.9335554838180542, "vf_loss": 95.93331146240234}, "grad_time_ms": 745.026}, "pid": 3934253, "time_total_s": 15620.19240450859, "episode_reward_mean": -160.06733349064882, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -196.3103197721101, "policy_reward_mean": {}, "episodes_total": 3120, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -149.96283505629324, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_18-56-52", "training_iteration": 130, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756486612, "episode_len_mean": 50.0, "timesteps_since_restore": 156000, "time_since_restore": 15620.19240450859, "time_this_iter_s": 120.13118314743042, "iterations_since_restore": 130}
+{"timesteps_total": 157200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 102758.833, "num_steps_sampled": 157200, "update_time_ms": 2.466, "num_steps_trained": 157200, "load_time_ms": 0.615, "default": {"kl": 0.01456800103187561, "cur_lr": 4.999999873689376e-05, "entropy": 15.418561935424805, "total_loss": 153.66900634765625, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13157440721988678, "vf_explained_var": 0.8986132740974426, "vf_loss": 153.78582763671875}, "grad_time_ms": 742.747}, "pid": 3934253, "time_total_s": 15726.398941993713, "episode_reward_mean": -159.49411724190676, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -202.14320656776363, "policy_reward_mean": {}, "episodes_total": 3144, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -149.96283505629324, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_18-58-38", "training_iteration": 131, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756486718, "episode_len_mean": 50.0, "timesteps_since_restore": 157200, "time_since_restore": 15726.398941993713, "time_this_iter_s": 106.20653748512268, "iterations_since_restore": 131}
+{"timesteps_total": 158400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 103739.538, "num_steps_sampled": 158400, "update_time_ms": 2.501, "num_steps_trained": 158400, "load_time_ms": 0.614, "default": {"kl": 0.016706252470612526, "cur_lr": 4.999999873689376e-05, "entropy": 15.317606925964355, "total_loss": 98.88345336914062, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1393449604511261, "vf_explained_var": 0.933607816696167, "vf_loss": 99.0058822631836}, "grad_time_ms": 710.562}, "pid": 3934253, "time_total_s": 15834.054826974869, "episode_reward_mean": -159.85676489001088, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -202.14320656776363, "policy_reward_mean": {}, "episodes_total": 3168, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -150.4201484124871, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_19-00-26", "training_iteration": 132, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756486826, "episode_len_mean": 50.0, "timesteps_since_restore": 158400, "time_since_restore": 15834.054826974869, "time_this_iter_s": 107.6558849811554, "iterations_since_restore": 132}
+{"timesteps_total": 159600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 103062.235, "num_steps_sampled": 159600, "update_time_ms": 2.536, "num_steps_trained": 159600, "load_time_ms": 0.624, "default": {"kl": 0.016824984923005104, "cur_lr": 4.999999873689376e-05, "entropy": 15.423134803771973, "total_loss": 95.9524917602539, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12300290167331696, "vf_explained_var": 0.9352494478225708, "vf_loss": 96.05846405029297}, "grad_time_ms": 700.816}, "pid": 3934253, "time_total_s": 15936.423606872559, "episode_reward_mean": -159.00856716484094, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -202.14320656776363, "policy_reward_mean": {}, "episodes_total": 3192, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -147.035794824748, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_19-02-08", "training_iteration": 133, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756486928, "episode_len_mean": 50.0, "timesteps_since_restore": 159600, "time_since_restore": 15936.423606872559, "time_this_iter_s": 102.36877989768982, "iterations_since_restore": 133}
+{"timesteps_total": 160800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 104477.854, "num_steps_sampled": 160800, "update_time_ms": 2.555, "num_steps_trained": 160800, "load_time_ms": 0.651, "default": {"kl": 0.01582499034702778, "cur_lr": 4.999999873689376e-05, "entropy": 15.176227569580078, "total_loss": 150.65570068359375, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14522379636764526, "vf_explained_var": 0.9030457735061646, "vf_loss": 150.78488159179688}, "grad_time_ms": 673.787}, "pid": 3934253, "time_total_s": 16058.623097419739, "episode_reward_mean": -159.1646082147905, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -202.14320656776363, "policy_reward_mean": {}, "episodes_total": 3216, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -147.035794824748, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_19-04-11", "training_iteration": 134, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756487051, "episode_len_mean": 50.0, "timesteps_since_restore": 160800, "time_since_restore": 16058.623097419739, "time_this_iter_s": 122.19949054718018, "iterations_since_restore": 134}
+{"timesteps_total": 162000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 106125.728, "num_steps_sampled": 162000, "update_time_ms": 2.586, "num_steps_trained": 162000, "load_time_ms": 0.656, "default": {"kl": 0.01506539061665535, "cur_lr": 4.999999873689376e-05, "entropy": 15.253995895385742, "total_loss": 106.41146850585938, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11677607893943787, "vf_explained_var": 0.9423614144325256, "vf_loss": 106.51298522949219}, "grad_time_ms": 696.908}, "pid": 3934253, "time_total_s": 16167.699571847916, "episode_reward_mean": -158.73587543872088, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -193.33495906545753, "policy_reward_mean": {}, "episodes_total": 3240, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -147.035794824748, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_19-06-00", "training_iteration": 135, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756487160, "episode_len_mean": 50.0, "timesteps_since_restore": 162000, "time_since_restore": 16167.699571847916, "time_this_iter_s": 109.07647442817688, "iterations_since_restore": 135}
+{"timesteps_total": 163200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 106775.9, "num_steps_sampled": 163200, "update_time_ms": 2.556, "num_steps_trained": 163200, "load_time_ms": 0.653, "default": {"kl": 0.014284864068031311, "cur_lr": 4.999999873689376e-05, "entropy": 15.149747848510742, "total_loss": 78.15953063964844, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13005171716213226, "vf_explained_var": 0.9475562572479248, "vf_loss": 78.27511596679688}, "grad_time_ms": 702.728}, "pid": 3934253, "time_total_s": 16273.625362873077, "episode_reward_mean": -158.2200610019019, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -194.0735576508897, "policy_reward_mean": {}, "episodes_total": 3264, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -147.035794824748, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_19-07-46", "training_iteration": 136, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756487266, "episode_len_mean": 50.0, "timesteps_since_restore": 163200, "time_since_restore": 16273.625362873077, "time_this_iter_s": 105.92579102516174, "iterations_since_restore": 136}
+{"timesteps_total": 164400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 105439.688, "num_steps_sampled": 164400, "update_time_ms": 2.471, "num_steps_trained": 164400, "load_time_ms": 0.652, "default": {"kl": 0.015326268039643764, "cur_lr": 4.999999873689376e-05, "entropy": 15.038931846618652, "total_loss": 76.04287719726562, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1274718940258026, "vf_explained_var": 0.9446787238121033, "vf_loss": 76.15482330322266}, "grad_time_ms": 704.662}, "pid": 3934253, "time_total_s": 16374.442579507828, "episode_reward_mean": -158.5093182311461, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -194.0735576508897, "policy_reward_mean": {}, "episodes_total": 3288, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -151.32592374317068, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_19-09-26", "training_iteration": 137, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756487366, "episode_len_mean": 50.0, "timesteps_since_restore": 164400, "time_since_restore": 16374.442579507828, "time_this_iter_s": 100.81721663475037, "iterations_since_restore": 137}
+{"timesteps_total": 165600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 105544.13, "num_steps_sampled": 165600, "update_time_ms": 2.483, "num_steps_trained": 165600, "load_time_ms": 0.653, "default": {"kl": 0.01429035235196352, "cur_lr": 4.999999873689376e-05, "entropy": 15.274619102478027, "total_loss": 113.89822387695312, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.119236521422863, "vf_explained_var": 0.9221948981285095, "vf_loss": 114.00298309326172}, "grad_time_ms": 691.319}, "pid": 3934253, "time_total_s": 16482.721665859222, "episode_reward_mean": -158.44117571903706, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -195.68461275679073, "policy_reward_mean": {}, "episodes_total": 3312, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -151.32592374317068, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_19-11-15", "training_iteration": 138, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756487475, "episode_len_mean": 50.0, "timesteps_since_restore": 165600, "time_since_restore": 16482.721665859222, "time_this_iter_s": 108.27908635139465, "iterations_since_restore": 138}
+{"timesteps_total": 166800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 108306.367, "num_steps_sampled": 166800, "update_time_ms": 2.444, "num_steps_trained": 166800, "load_time_ms": 0.647, "default": {"kl": 0.015218976885080338, "cur_lr": 4.999999873689376e-05, "entropy": 15.024642944335938, "total_loss": 92.8395004272461, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12442895770072937, "vf_explained_var": 0.9352640509605408, "vf_loss": 92.94851684570312}, "grad_time_ms": 685.966}, "pid": 3934253, "time_total_s": 16590.070190668106, "episode_reward_mean": -158.71721453232985, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -195.68461275679073, "policy_reward_mean": {}, "episodes_total": 3336, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -146.55230270325862, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_19-13-02", "training_iteration": 139, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756487582, "episode_len_mean": 50.0, "timesteps_since_restore": 166800, "time_since_restore": 16590.070190668106, "time_this_iter_s": 107.34852480888367, "iterations_since_restore": 139}
+{"timesteps_total": 168000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 107962.214, "num_steps_sampled": 168000, "update_time_ms": 2.448, "num_steps_trained": 168000, "load_time_ms": 0.642, "default": {"kl": 0.012888466008007526, "cur_lr": 4.999999873689376e-05, "entropy": 15.229165077209473, "total_loss": 126.61551666259766, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11669489741325378, "vf_explained_var": 0.9207143783569336, "vf_loss": 126.71916961669922}, "grad_time_ms": 678.166}, "pid": 3934253, "time_total_s": 16706.68172430992, "episode_reward_mean": -159.03158914373972, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -209.50328456745822, "policy_reward_mean": {}, "episodes_total": 3360, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.81058536609197, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_19-14-59", "training_iteration": 140, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756487699, "episode_len_mean": 50.0, "timesteps_since_restore": 168000, "time_since_restore": 16706.68172430992, "time_this_iter_s": 116.61153364181519, "iterations_since_restore": 140}
+{"timesteps_total": 169200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 108109.572, "num_steps_sampled": 169200, "update_time_ms": 2.434, "num_steps_trained": 169200, "load_time_ms": 0.641, "default": {"kl": 0.014938879758119583, "cur_lr": 4.999999873689376e-05, "entropy": 15.037114143371582, "total_loss": 56.33360290527344, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12748420238494873, "vf_explained_var": 0.9595043659210205, "vf_loss": 56.44596862792969}, "grad_time_ms": 681.575}, "pid": 3934253, "time_total_s": 16814.394966363907, "episode_reward_mean": -158.5527541966109, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -209.50328456745822, "policy_reward_mean": {}, "episodes_total": 3384, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.81058536609197, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_19-16-46", "training_iteration": 141, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756487806, "episode_len_mean": 50.0, "timesteps_since_restore": 169200, "time_since_restore": 16814.394966363907, "time_this_iter_s": 107.7132420539856, "iterations_since_restore": 141}
+{"timesteps_total": 170400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 106676.728, "num_steps_sampled": 170400, "update_time_ms": 2.39, "num_steps_trained": 170400, "load_time_ms": 0.64, "default": {"kl": 0.015551741234958172, "cur_lr": 4.999999873689376e-05, "entropy": 15.132568359375, "total_loss": 95.65824890136719, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12652461230754852, "vf_explained_var": 0.9271260499954224, "vf_loss": 95.76902770996094}, "grad_time_ms": 703.401}, "pid": 3934253, "time_total_s": 16907.940058231354, "episode_reward_mean": -158.10055911942175, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -209.50328456745822, "policy_reward_mean": {}, "episodes_total": 3408, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.81058536609197, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_19-18-20", "training_iteration": 142, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756487900, "episode_len_mean": 50.0, "timesteps_since_restore": 170400, "time_since_restore": 16907.940058231354, "time_this_iter_s": 93.5450918674469, "iterations_since_restore": 142}
+{"timesteps_total": 171600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 106873.675, "num_steps_sampled": 171600, "update_time_ms": 2.316, "num_steps_trained": 171600, "load_time_ms": 0.638, "default": {"kl": 0.01617261953651905, "cur_lr": 4.999999873689376e-05, "entropy": 15.013951301574707, "total_loss": 65.23526000976562, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13598722219467163, "vf_explained_var": 0.9518048763275146, "vf_loss": 65.35486602783203}, "grad_time_ms": 719.478}, "pid": 3934253, "time_total_s": 17012.4382250309, "episode_reward_mean": -158.30730094810116, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -209.50328456745822, "policy_reward_mean": {}, "episodes_total": 3432, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.81058536609197, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_19-20-05", "training_iteration": 143, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756488005, "episode_len_mean": 50.0, "timesteps_since_restore": 171600, "time_since_restore": 17012.4382250309, "time_this_iter_s": 104.49816679954529, "iterations_since_restore": 143}
+{"timesteps_total": 172800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 106235.145, "num_steps_sampled": 172800, "update_time_ms": 2.349, "num_steps_trained": 172800, "load_time_ms": 0.622, "default": {"kl": 0.015435976907610893, "cur_lr": 4.999999873689376e-05, "entropy": 15.000235557556152, "total_loss": 62.51327896118164, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13944146037101746, "vf_explained_var": 0.9515180587768555, "vf_loss": 62.63710021972656}, "grad_time_ms": 750.126}, "pid": 3934253, "time_total_s": 17128.5585501194, "episode_reward_mean": -156.8989274949609, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -175.5255590819791, "policy_reward_mean": {}, "episodes_total": 3456, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -149.70738469206646, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_19-22-01", "training_iteration": 144, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756488121, "episode_len_mean": 50.0, "timesteps_since_restore": 172800, "time_since_restore": 17128.5585501194, "time_this_iter_s": 116.12032508850098, "iterations_since_restore": 144}
+{"timesteps_total": 174000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 105283.376, "num_steps_sampled": 174000, "update_time_ms": 2.302, "num_steps_trained": 174000, "load_time_ms": 0.619, "default": {"kl": 0.014956353232264519, "cur_lr": 4.999999873689376e-05, "entropy": 14.847784996032715, "total_loss": 104.35578918457031, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12986616790294647, "vf_explained_var": 0.9379231333732605, "vf_loss": 104.47049713134766}, "grad_time_ms": 733.99}, "pid": 3934253, "time_total_s": 17227.954606294632, "episode_reward_mean": -156.9433194148839, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -187.17942537200705, "policy_reward_mean": {}, "episodes_total": 3480, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -149.70738469206646, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_19-23-40", "training_iteration": 145, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756488220, "episode_len_mean": 50.0, "timesteps_since_restore": 174000, "time_since_restore": 17227.954606294632, "time_this_iter_s": 99.39605617523193, "iterations_since_restore": 145}
+{"timesteps_total": 175200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 105196.062, "num_steps_sampled": 175200, "update_time_ms": 2.314, "num_steps_trained": 175200, "load_time_ms": 0.621, "default": {"kl": 0.013884143903851509, "cur_lr": 4.999999873689376e-05, "entropy": 14.91396713256836, "total_loss": 66.7122802734375, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13310521841049194, "vf_explained_var": 0.9542436599731445, "vf_loss": 66.83132934570312}, "grad_time_ms": 707.662}, "pid": 3934253, "time_total_s": 17332.743657827377, "episode_reward_mean": -157.06993405255005, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -202.21004607666393, "policy_reward_mean": {}, "episodes_total": 3504, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -149.70738469206646, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_19-25-25", "training_iteration": 146, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756488325, "episode_len_mean": 50.0, "timesteps_since_restore": 175200, "time_since_restore": 17332.743657827377, "time_this_iter_s": 104.78905153274536, "iterations_since_restore": 146}
+{"timesteps_total": 176400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 105773.671, "num_steps_sampled": 176400, "update_time_ms": 2.328, "num_steps_trained": 176400, "load_time_ms": 0.616, "default": {"kl": 0.015176494605839252, "cur_lr": 4.999999873689376e-05, "entropy": 15.008373260498047, "total_loss": 79.77460479736328, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11993683874607086, "vf_explained_var": 0.9480642676353455, "vf_loss": 79.87918090820312}, "grad_time_ms": 704.527}, "pid": 3934253, "time_total_s": 17439.30501151085, "episode_reward_mean": -157.1401521045944, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -202.21004607666393, "policy_reward_mean": {}, "episodes_total": 3528, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -147.43059014043487, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_19-27-11", "training_iteration": 147, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756488431, "episode_len_mean": 50.0, "timesteps_since_restore": 176400, "time_since_restore": 17439.30501151085, "time_this_iter_s": 106.56135368347168, "iterations_since_restore": 147}
+{"timesteps_total": 177600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 102626.23, "num_steps_sampled": 177600, "update_time_ms": 2.288, "num_steps_trained": 177600, "load_time_ms": 0.614, "default": {"kl": 0.016340788453817368, "cur_lr": 4.999999873689376e-05, "entropy": 14.866175651550293, "total_loss": 50.99203872680664, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.127852201461792, "vf_explained_var": 0.9572170972824097, "vf_loss": 51.10334777832031}, "grad_time_ms": 713.616}, "pid": 3934253, "time_total_s": 17516.197714090347, "episode_reward_mean": -156.9326692679125, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -202.21004607666393, "policy_reward_mean": {}, "episodes_total": 3552, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -145.1666515668931, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_19-28-28", "training_iteration": 148, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756488508, "episode_len_mean": 50.0, "timesteps_since_restore": 177600, "time_since_restore": 17516.197714090347, "time_this_iter_s": 76.89270257949829, "iterations_since_restore": 148}
+{"timesteps_total": 178800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 102912.524, "num_steps_sampled": 178800, "update_time_ms": 2.288, "num_steps_trained": 178800, "load_time_ms": 0.611, "default": {"kl": 0.015348482877016068, "cur_lr": 4.999999873689376e-05, "entropy": 14.877336502075195, "total_loss": 92.67220306396484, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.15013960003852844, "vf_explained_var": 0.9410419464111328, "vf_loss": 92.80680847167969}, "grad_time_ms": 722.665}, "pid": 3934253, "time_total_s": 17626.49950647354, "episode_reward_mean": -158.18542591613408, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -232.173069817677, "policy_reward_mean": {}, "episodes_total": 3576, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -145.1666515668931, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_19-30-19", "training_iteration": 149, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756488619, "episode_len_mean": 50.0, "timesteps_since_restore": 178800, "time_since_restore": 17626.49950647354, "time_this_iter_s": 110.30179238319397, "iterations_since_restore": 149}
+{"timesteps_total": 180000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 103219.799, "num_steps_sampled": 180000, "update_time_ms": 2.304, "num_steps_trained": 180000, "load_time_ms": 0.611, "default": {"kl": 0.014295445755124092, "cur_lr": 4.999999873689376e-05, "entropy": 14.858844757080078, "total_loss": 46.206031799316406, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11927267909049988, "vf_explained_var": 0.9663113355636597, "vf_loss": 46.310829162597656}, "grad_time_ms": 728.864}, "pid": 3934253, "time_total_s": 17746.24654841423, "episode_reward_mean": -158.00782030045582, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -232.173069817677, "policy_reward_mean": {}, "episodes_total": 3600, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -145.1666515668931, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_19-32-18", "training_iteration": 150, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756488738, "episode_len_mean": 50.0, "timesteps_since_restore": 180000, "time_since_restore": 17746.24654841423, "time_this_iter_s": 119.74704194068909, "iterations_since_restore": 150}
+{"timesteps_total": 181200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 103434.054, "num_steps_sampled": 181200, "update_time_ms": 2.36, "num_steps_trained": 181200, "load_time_ms": 0.615, "default": {"kl": 0.015794552862644196, "cur_lr": 4.999999873689376e-05, "entropy": 14.848892211914062, "total_loss": 82.28297424316406, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12699836492538452, "vf_explained_var": 0.9383652806282043, "vf_loss": 82.39397430419922}, "grad_time_ms": 718.235}, "pid": 3934253, "time_total_s": 17855.997240543365, "episode_reward_mean": -157.82890956270467, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -232.173069817677, "policy_reward_mean": {}, "episodes_total": 3624, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -145.1666515668931, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_19-34-08", "training_iteration": 151, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756488848, "episode_len_mean": 50.0, "timesteps_since_restore": 181200, "time_since_restore": 17855.997240543365, "time_this_iter_s": 109.75069212913513, "iterations_since_restore": 151}
+{"timesteps_total": 182400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 106840.299, "num_steps_sampled": 182400, "update_time_ms": 2.391, "num_steps_trained": 182400, "load_time_ms": 0.621, "default": {"kl": 0.01783747598528862, "cur_lr": 4.999999873689376e-05, "entropy": 14.777881622314453, "total_loss": 92.23494720458984, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1375618278980255, "vf_explained_var": 0.9388156533241272, "vf_loss": 92.35443878173828}, "grad_time_ms": 703.231}, "pid": 3934253, "time_total_s": 17983.454869747162, "episode_reward_mean": -157.61030282202955, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -232.173069817677, "policy_reward_mean": {}, "episodes_total": 3648, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -148.19687584877354, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_19-36-16", "training_iteration": 152, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756488976, "episode_len_mean": 50.0, "timesteps_since_restore": 182400, "time_since_restore": 17983.454869747162, "time_this_iter_s": 127.45762920379639, "iterations_since_restore": 152}
+{"timesteps_total": 183600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 107353.866, "num_steps_sampled": 183600, "update_time_ms": 2.486, "num_steps_trained": 183600, "load_time_ms": 0.649, "default": {"kl": 0.014833658933639526, "cur_lr": 4.999999873689376e-05, "entropy": 14.644444465637207, "total_loss": 77.48524475097656, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12767963111400604, "vf_explained_var": 0.9482372999191284, "vf_loss": 77.597900390625}, "grad_time_ms": 670.73}, "pid": 3934253, "time_total_s": 18092.76464152336, "episode_reward_mean": -156.80859157196807, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -232.173069817677, "policy_reward_mean": {}, "episodes_total": 3672, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -147.6635856393042, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_19-38-05", "training_iteration": 153, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756489085, "episode_len_mean": 50.0, "timesteps_since_restore": 183600, "time_since_restore": 18092.76464152336, "time_this_iter_s": 109.30977177619934, "iterations_since_restore": 153}
+{"timesteps_total": 184800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 106945.08, "num_steps_sampled": 184800, "update_time_ms": 2.504, "num_steps_trained": 184800, "load_time_ms": 0.626, "default": {"kl": 0.014095836319029331, "cur_lr": 4.999999873689376e-05, "entropy": 14.716404914855957, "total_loss": 50.62611770629883, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11327210813760757, "vf_explained_var": 0.9626729488372803, "vf_loss": 50.72511672973633}, "grad_time_ms": 643.517}, "pid": 3934253, "time_total_s": 18204.524663448334, "episode_reward_mean": -155.84383666926186, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -187.26780230902494, "policy_reward_mean": {}, "episodes_total": 3696, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -147.6635856393042, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_19-39-57", "training_iteration": 154, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756489197, "episode_len_mean": 50.0, "timesteps_since_restore": 184800, "time_since_restore": 18204.524663448334, "time_this_iter_s": 111.76002192497253, "iterations_since_restore": 154}
+{"timesteps_total": 186000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 106655.26, "num_steps_sampled": 186000, "update_time_ms": 2.491, "num_steps_trained": 186000, "load_time_ms": 0.629, "default": {"kl": 0.014999334700405598, "cur_lr": 4.999999873689376e-05, "entropy": 14.71993350982666, "total_loss": 34.203369140625, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1288701295852661, "vf_explained_var": 0.9726163148880005, "vf_loss": 34.31705093383789}, "grad_time_ms": 661.285}, "pid": 3934253, "time_total_s": 18301.200717687607, "episode_reward_mean": -155.7455358243003, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -187.26780230902494, "policy_reward_mean": {}, "episodes_total": 3720, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -147.6635856393042, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_19-41-33", "training_iteration": 155, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756489293, "episode_len_mean": 50.0, "timesteps_since_restore": 186000, "time_since_restore": 18301.200717687607, "time_this_iter_s": 96.67605423927307, "iterations_since_restore": 155}
+{"timesteps_total": 187200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 106523.155, "num_steps_sampled": 187200, "update_time_ms": 2.514, "num_steps_trained": 187200, "load_time_ms": 0.636, "default": {"kl": 0.01529185101389885, "cur_lr": 4.999999873689376e-05, "entropy": 14.741146087646484, "total_loss": 63.08943557739258, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13563477993011475, "vf_explained_var": 0.9613681435585022, "vf_loss": 63.20958709716797}, "grad_time_ms": 687.403}, "pid": 3934253, "time_total_s": 18404.930746793747, "episode_reward_mean": -156.05749131747933, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -187.26780230902494, "policy_reward_mean": {}, "episodes_total": 3744, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -147.6635856393042, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_19-43-17", "training_iteration": 156, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756489397, "episode_len_mean": 50.0, "timesteps_since_restore": 187200, "time_since_restore": 18404.930746793747, "time_this_iter_s": 103.73002910614014, "iterations_since_restore": 156}
+{"timesteps_total": 188400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 106234.606, "num_steps_sampled": 188400, "update_time_ms": 2.53, "num_steps_trained": 188400, "load_time_ms": 0.638, "default": {"kl": 0.01576061360538006, "cur_lr": 4.999999873689376e-05, "entropy": 14.539962768554688, "total_loss": 51.49734878540039, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12901757657527924, "vf_explained_var": 0.9635226726531982, "vf_loss": 51.61040496826172}, "grad_time_ms": 680.89}, "pid": 3934253, "time_total_s": 18508.54259133339, "episode_reward_mean": -156.18697868414674, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -187.26780230902494, "policy_reward_mean": {}, "episodes_total": 3768, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -149.05326009298292, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_19-45-01", "training_iteration": 157, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756489501, "episode_len_mean": 50.0, "timesteps_since_restore": 188400, "time_since_restore": 18508.54259133339, "time_this_iter_s": 103.61184453964233, "iterations_since_restore": 157}
+{"timesteps_total": 189600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 110393.008, "num_steps_sampled": 189600, "update_time_ms": 2.544, "num_steps_trained": 189600, "load_time_ms": 0.639, "default": {"kl": 0.015366671606898308, "cur_lr": 4.999999873689376e-05, "entropy": 14.503620147705078, "total_loss": 44.8004264831543, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1365458071231842, "vf_explained_var": 0.96863853931427, "vf_loss": 44.921409606933594}, "grad_time_ms": 683.531}, "pid": 3934253, "time_total_s": 18627.046046733856, "episode_reward_mean": -156.31557634699521, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -170.84411173980249, "policy_reward_mean": {}, "episodes_total": 3792, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -150.2876891507201, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_19-46-59", "training_iteration": 158, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756489619, "episode_len_mean": 50.0, "timesteps_since_restore": 189600, "time_since_restore": 18627.046046733856, "time_this_iter_s": 118.50345540046692, "iterations_since_restore": 158}
+{"timesteps_total": 190800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 111784.543, "num_steps_sampled": 190800, "update_time_ms": 2.52, "num_steps_trained": 190800, "load_time_ms": 0.64, "default": {"kl": 0.015097062103450298, "cur_lr": 4.999999873689376e-05, "entropy": 14.573460578918457, "total_loss": 45.253807067871094, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12723152339458466, "vf_explained_var": 0.9695051908493042, "vf_loss": 45.36575698852539}, "grad_time_ms": 680.895}, "pid": 3934253, "time_total_s": 18751.23653268814, "episode_reward_mean": -156.06897775264233, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -170.84411173980249, "policy_reward_mean": {}, "episodes_total": 3816, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.36662116168563, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_19-49-04", "training_iteration": 159, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756489744, "episode_len_mean": 50.0, "timesteps_since_restore": 190800, "time_since_restore": 18751.23653268814, "time_this_iter_s": 124.19048595428467, "iterations_since_restore": 159}
+{"timesteps_total": 192000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 110586.829, "num_steps_sampled": 192000, "update_time_ms": 2.531, "num_steps_trained": 192000, "load_time_ms": 0.642, "default": {"kl": 0.015620950609445572, "cur_lr": 4.999999873689376e-05, "entropy": 14.42264175415039, "total_loss": 44.00412368774414, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12604941427707672, "vf_explained_var": 0.9683871865272522, "vf_loss": 44.11436462402344}, "grad_time_ms": 669.18}, "pid": 3934253, "time_total_s": 18858.888377189636, "episode_reward_mean": -155.75848397267933, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -168.91110461407595, "policy_reward_mean": {}, "episodes_total": 3840, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.36662116168563, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_19-50-51", "training_iteration": 160, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756489851, "episode_len_mean": 50.0, "timesteps_since_restore": 192000, "time_since_restore": 18858.888377189636, "time_this_iter_s": 107.65184450149536, "iterations_since_restore": 160}
+{"timesteps_total": 193200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 111276.767, "num_steps_sampled": 193200, "update_time_ms": 2.516, "num_steps_trained": 193200, "load_time_ms": 0.647, "default": {"kl": 0.014775075949728489, "cur_lr": 4.999999873689376e-05, "entropy": 14.54749870300293, "total_loss": 58.11050033569336, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1309185028076172, "vf_explained_var": 0.9599100947380066, "vf_loss": 58.22645950317383}, "grad_time_ms": 689.36}, "pid": 3934253, "time_total_s": 18975.739804506302, "episode_reward_mean": -155.97364649677118, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -192.82730108260392, "policy_reward_mean": {}, "episodes_total": 3864, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.36662116168563, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_19-52-48", "training_iteration": 161, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756489968, "episode_len_mean": 50.0, "timesteps_since_restore": 193200, "time_since_restore": 18975.739804506302, "time_this_iter_s": 116.85142731666565, "iterations_since_restore": 161}
+{"timesteps_total": 194400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 107563.772, "num_steps_sampled": 194400, "update_time_ms": 2.474, "num_steps_trained": 194400, "load_time_ms": 0.641, "default": {"kl": 0.015671495348215103, "cur_lr": 4.999999873689376e-05, "entropy": 14.508405685424805, "total_loss": 40.06678009033203, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12871819734573364, "vf_explained_var": 0.9688021540641785, "vf_loss": 40.179630279541016}, "grad_time_ms": 698.056}, "pid": 3934253, "time_total_s": 19066.154118299484, "episode_reward_mean": -155.86087650370283, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -192.82730108260392, "policy_reward_mean": {}, "episodes_total": 3888, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.36662116168563, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_19-54-19", "training_iteration": 162, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756490059, "episode_len_mean": 50.0, "timesteps_since_restore": 194400, "time_since_restore": 19066.154118299484, "time_this_iter_s": 90.41431379318237, "iterations_since_restore": 162}
+{"timesteps_total": 195600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 107835.158, "num_steps_sampled": 195600, "update_time_ms": 2.403, "num_steps_trained": 195600, "load_time_ms": 0.609, "default": {"kl": 0.01576964743435383, "cur_lr": 4.999999873689376e-05, "entropy": 14.222159385681152, "total_loss": 38.615726470947266, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12422147393226624, "vf_explained_var": 0.972466766834259, "vf_loss": 38.72397994995117}, "grad_time_ms": 734.539}, "pid": 3934253, "time_total_s": 19178.542206048965, "episode_reward_mean": -155.77523854605596, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -192.82730108260392, "policy_reward_mean": {}, "episodes_total": 3912, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -150.75634943073578, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_19-56-11", "training_iteration": 163, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756490171, "episode_len_mean": 50.0, "timesteps_since_restore": 195600, "time_since_restore": 19178.542206048965, "time_this_iter_s": 112.3880877494812, "iterations_since_restore": 163}
+{"timesteps_total": 196800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 105948.854, "num_steps_sampled": 196800, "update_time_ms": 2.353, "num_steps_trained": 196800, "load_time_ms": 0.613, "default": {"kl": 0.01632661558687687, "cur_lr": 4.999999873689376e-05, "entropy": 14.407248497009277, "total_loss": 66.69595336914062, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1468474119901657, "vf_explained_var": 0.9592094421386719, "vf_loss": 66.82626342773438}, "grad_time_ms": 758.658}, "pid": 3934253, "time_total_s": 19271.680288791656, "episode_reward_mean": -155.52057609509816, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -192.82730108260392, "policy_reward_mean": {}, "episodes_total": 3936, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -150.59467953216102, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_19-57-44", "training_iteration": 164, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756490264, "episode_len_mean": 50.0, "timesteps_since_restore": 196800, "time_since_restore": 19271.680288791656, "time_this_iter_s": 93.13808274269104, "iterations_since_restore": 164}
+{"timesteps_total": 198000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 106547.882, "num_steps_sampled": 198000, "update_time_ms": 2.368, "num_steps_trained": 198000, "load_time_ms": 0.612, "default": {"kl": 0.014598803594708443, "cur_lr": 4.999999873689376e-05, "entropy": 14.533857345581055, "total_loss": 43.06224060058594, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12842096388339996, "vf_explained_var": 0.9679848551750183, "vf_loss": 43.17587661743164}, "grad_time_ms": 757.885}, "pid": 3934253, "time_total_s": 19374.33864402771, "episode_reward_mean": -155.1683275163884, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -186.20441058789976, "policy_reward_mean": {}, "episodes_total": 3960, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -149.0035912566383, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_19-59-27", "training_iteration": 165, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756490367, "episode_len_mean": 50.0, "timesteps_since_restore": 198000, "time_since_restore": 19374.33864402771, "time_this_iter_s": 102.65835523605347, "iterations_since_restore": 165}
+{"timesteps_total": 199200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 105139.279, "num_steps_sampled": 199200, "update_time_ms": 2.332, "num_steps_trained": 199200, "load_time_ms": 0.608, "default": {"kl": 0.014788919128477573, "cur_lr": 4.999999873689376e-05, "entropy": 14.377288818359375, "total_loss": 48.58959197998047, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12106100469827652, "vf_explained_var": 0.9630370736122131, "vf_loss": 48.6956787109375}, "grad_time_ms": 758.957}, "pid": 3934253, "time_total_s": 19463.992821216583, "episode_reward_mean": -154.9477786673612, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -186.20441058789976, "policy_reward_mean": {}, "episodes_total": 3984, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.68382772036009, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_20-00-56", "training_iteration": 166, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756490456, "episode_len_mean": 50.0, "timesteps_since_restore": 199200, "time_since_restore": 19463.992821216583, "time_this_iter_s": 89.65417718887329, "iterations_since_restore": 166}
+{"timesteps_total": 200400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 107294.494, "num_steps_sampled": 200400, "update_time_ms": 2.31, "num_steps_trained": 200400, "load_time_ms": 0.607, "default": {"kl": 0.015912381932139397, "cur_lr": 4.999999873689376e-05, "entropy": 14.272615432739258, "total_loss": 42.7900390625, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13069111108779907, "vf_explained_var": 0.9659023284912109, "vf_loss": 42.90461730957031}, "grad_time_ms": 754.259}, "pid": 3934253, "time_total_s": 19589.108632087708, "episode_reward_mean": -155.09633164691525, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -186.20441058789976, "policy_reward_mean": {}, "episodes_total": 4008, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.68382772036009, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_20-03-02", "training_iteration": 167, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756490582, "episode_len_mean": 50.0, "timesteps_since_restore": 200400, "time_since_restore": 19589.108632087708, "time_this_iter_s": 125.11581087112427, "iterations_since_restore": 167}
+{"timesteps_total": 201600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 105697.83, "num_steps_sampled": 201600, "update_time_ms": 2.295, "num_steps_trained": 201600, "load_time_ms": 0.607, "default": {"kl": 0.014985193498432636, "cur_lr": 4.999999873689376e-05, "entropy": 14.434755325317383, "total_loss": 35.87843322753906, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13099879026412964, "vf_explained_var": 0.9744190573692322, "vf_loss": 35.994258880615234}, "grad_time_ms": 757.804}, "pid": 3934253, "time_total_s": 19691.680990934372, "episode_reward_mean": -155.6347589901296, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -186.20441058789976, "policy_reward_mean": {}, "episodes_total": 4032, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -138.59291754226575, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_20-04-44", "training_iteration": 168, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756490684, "episode_len_mean": 50.0, "timesteps_since_restore": 201600, "time_since_restore": 19691.680990934372, "time_this_iter_s": 102.57235884666443, "iterations_since_restore": 168}
+{"timesteps_total": 202800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 102117.547, "num_steps_sampled": 202800, "update_time_ms": 2.31, "num_steps_trained": 202800, "load_time_ms": 0.623, "default": {"kl": 0.015808604657649994, "cur_lr": 4.999999873689376e-05, "entropy": 14.588302612304688, "total_loss": 55.970699310302734, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13252593576908112, "vf_explained_var": 0.9560667872428894, "vf_loss": 56.08721923828125}, "grad_time_ms": 754.829}, "pid": 3934253, "time_total_s": 19780.03944683075, "episode_reward_mean": -155.08221493769696, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -169.66817220868816, "policy_reward_mean": {}, "episodes_total": 4056, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -138.59291754226575, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_20-06-13", "training_iteration": 169, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756490773, "episode_len_mean": 50.0, "timesteps_since_restore": 202800, "time_since_restore": 19780.03944683075, "time_this_iter_s": 88.35845589637756, "iterations_since_restore": 169}
+{"timesteps_total": 204000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 101115.095, "num_steps_sampled": 204000, "update_time_ms": 2.281, "num_steps_trained": 204000, "load_time_ms": 0.621, "default": {"kl": 0.01640515774488449, "cur_lr": 4.999999873689376e-05, "entropy": 14.44264030456543, "total_loss": 33.83430099487305, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12805338203907013, "vf_explained_var": 0.9746472835540771, "vf_loss": 33.94574737548828}, "grad_time_ms": 776.615}, "pid": 3934253, "time_total_s": 19877.884481191635, "episode_reward_mean": -155.26188435914753, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -169.66817220868816, "policy_reward_mean": {}, "episodes_total": 4080, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -138.59291754226575, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_20-07-50", "training_iteration": 170, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756490870, "episode_len_mean": 50.0, "timesteps_since_restore": 204000, "time_since_restore": 19877.884481191635, "time_this_iter_s": 97.84503436088562, "iterations_since_restore": 170}
+{"timesteps_total": 205200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 98892.183, "num_steps_sampled": 205200, "update_time_ms": 2.217, "num_steps_trained": 205200, "load_time_ms": 0.621, "default": {"kl": 0.015521807596087456, "cur_lr": 4.999999873689376e-05, "entropy": 14.46370792388916, "total_loss": 60.55887222290039, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1447797566652298, "vf_explained_var": 0.9552225470542908, "vf_loss": 60.68794250488281}, "grad_time_ms": 763.954}, "pid": 3934253, "time_total_s": 19972.380245923996, "episode_reward_mean": -155.33247669421817, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -171.77568078754396, "policy_reward_mean": {}, "episodes_total": 4104, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -138.59291754226575, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_20-09-25", "training_iteration": 171, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756490965, "episode_len_mean": 50.0, "timesteps_since_restore": 205200, "time_since_restore": 19972.380245923996, "time_this_iter_s": 94.49576473236084, "iterations_since_restore": 171}
+{"timesteps_total": 206400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 100614.225, "num_steps_sampled": 206400, "update_time_ms": 2.248, "num_steps_trained": 206400, "load_time_ms": 0.622, "default": {"kl": 0.015789611265063286, "cur_lr": 4.999999873689376e-05, "entropy": 14.3331298828125, "total_loss": 48.4068717956543, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13671469688415527, "vf_explained_var": 0.9638553261756897, "vf_loss": 48.5275993347168}, "grad_time_ms": 775.318}, "pid": 3934253, "time_total_s": 20080.128808498383, "episode_reward_mean": -155.01792419325568, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -171.77568078754396, "policy_reward_mean": {}, "episodes_total": 4128, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -144.12471496163798, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_20-11-13", "training_iteration": 172, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756491073, "episode_len_mean": 50.0, "timesteps_since_restore": 206400, "time_since_restore": 20080.128808498383, "time_this_iter_s": 107.7485625743866, "iterations_since_restore": 172}
+{"timesteps_total": 207600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 98688.004, "num_steps_sampled": 207600, "update_time_ms": 2.24, "num_steps_trained": 207600, "load_time_ms": 0.622, "default": {"kl": 0.014612250961363316, "cur_lr": 4.999999873689376e-05, "entropy": 14.156961441040039, "total_loss": 42.88018035888672, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11420790106058121, "vf_explained_var": 0.9699710011482239, "vf_loss": 42.97959899902344}, "grad_time_ms": 761.07}, "pid": 3934253, "time_total_s": 20173.1121134758, "episode_reward_mean": -155.37836022919845, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -171.77568078754396, "policy_reward_mean": {}, "episodes_total": 4152, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -144.12471496163798, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_20-12-46", "training_iteration": 173, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756491166, "episode_len_mean": 50.0, "timesteps_since_restore": 207600, "time_since_restore": 20173.1121134758, "time_this_iter_s": 92.98330497741699, "iterations_since_restore": 173}
+{"timesteps_total": 208800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 99523.342, "num_steps_sampled": 208800, "update_time_ms": 2.206, "num_steps_trained": 208800, "load_time_ms": 0.621, "default": {"kl": 0.014946643263101578, "cur_lr": 4.999999873689376e-05, "entropy": 14.175942420959473, "total_loss": 57.44399642944336, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13504861295223236, "vf_explained_var": 0.9574512839317322, "vf_loss": 57.56391143798828}, "grad_time_ms": 765.866}, "pid": 3934253, "time_total_s": 20274.651047468185, "episode_reward_mean": -155.56842982858154, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -185.3305242842277, "policy_reward_mean": {}, "episodes_total": 4176, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -144.12471496163798, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_20-14-27", "training_iteration": 174, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756491267, "episode_len_mean": 50.0, "timesteps_since_restore": 208800, "time_since_restore": 20274.651047468185, "time_this_iter_s": 101.53893399238586, "iterations_since_restore": 174}
+{"timesteps_total": 210000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 98719.862, "num_steps_sampled": 210000, "update_time_ms": 2.231, "num_steps_trained": 210000, "load_time_ms": 0.622, "default": {"kl": 0.014225161634385586, "cur_lr": 4.999999873689376e-05, "entropy": 14.324098587036133, "total_loss": 59.64179229736328, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12911520898342133, "vf_explained_var": 0.9586576819419861, "vf_loss": 59.75650405883789}, "grad_time_ms": 739.584}, "pid": 3934253, "time_total_s": 20369.012630462646, "episode_reward_mean": -155.78960833378852, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -185.3305242842277, "policy_reward_mean": {}, "episodes_total": 4200, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -144.12471496163798, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_20-16-02", "training_iteration": 175, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756491362, "episode_len_mean": 50.0, "timesteps_since_restore": 210000, "time_since_restore": 20369.012630462646, "time_this_iter_s": 94.36158299446106, "iterations_since_restore": 175}
+{"timesteps_total": 211200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 98927.785, "num_steps_sampled": 211200, "update_time_ms": 2.238, "num_steps_trained": 211200, "load_time_ms": 0.621, "default": {"kl": 0.015747396275401115, "cur_lr": 4.999999873689376e-05, "entropy": 14.27517032623291, "total_loss": 35.0083122253418, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.137986421585083, "vf_explained_var": 0.9727128148078918, "vf_loss": 35.13035202026367}, "grad_time_ms": 746.11}, "pid": 3934253, "time_total_s": 20460.81170296669, "episode_reward_mean": -155.69055132115284, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -185.3305242842277, "policy_reward_mean": {}, "episodes_total": 4224, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -148.52867451145093, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_20-17-33", "training_iteration": 176, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756491453, "episode_len_mean": 50.0, "timesteps_since_restore": 211200, "time_since_restore": 20460.81170296669, "time_this_iter_s": 91.79907250404358, "iterations_since_restore": 176}
+{"timesteps_total": 212400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94410.29, "num_steps_sampled": 212400, "update_time_ms": 2.289, "num_steps_trained": 212400, "load_time_ms": 0.621, "default": {"kl": 0.01412759255617857, "cur_lr": 4.999999873689376e-05, "entropy": 14.326051712036133, "total_loss": 44.30991744995117, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12913967669010162, "vf_explained_var": 0.9653099775314331, "vf_loss": 44.42475891113281}, "grad_time_ms": 758.482}, "pid": 3934253, "time_total_s": 20540.877601861954, "episode_reward_mean": -155.4291076016693, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -185.3305242842277, "policy_reward_mean": {}, "episodes_total": 4248, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -148.52867451145093, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_20-18-53", "training_iteration": 177, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756491533, "episode_len_mean": 50.0, "timesteps_since_restore": 212400, "time_since_restore": 20540.877601861954, "time_this_iter_s": 80.06589889526367, "iterations_since_restore": 177}
+{"timesteps_total": 213600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94658.357, "num_steps_sampled": 213600, "update_time_ms": 2.329, "num_steps_trained": 213600, "load_time_ms": 0.617, "default": {"kl": 0.014271329157054424, "cur_lr": 4.999999873689376e-05, "entropy": 14.212542533874512, "total_loss": 57.12761688232422, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12536929547786713, "vf_explained_var": 0.9578894376754761, "vf_loss": 57.23854064941406}, "grad_time_ms": 738.686}, "pid": 3934253, "time_total_s": 20645.73306274414, "episode_reward_mean": -155.17733970045182, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -175.80811120532408, "policy_reward_mean": {}, "episodes_total": 4272, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -148.15369681094623, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_20-20-38", "training_iteration": 178, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756491638, "episode_len_mean": 50.0, "timesteps_since_restore": 213600, "time_since_restore": 20645.73306274414, "time_this_iter_s": 104.85546088218689, "iterations_since_restore": 178}
+{"timesteps_total": 214800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94257.458, "num_steps_sampled": 214800, "update_time_ms": 2.356, "num_steps_trained": 214800, "load_time_ms": 0.602, "default": {"kl": 0.014027887023985386, "cur_lr": 4.999999873689376e-05, "entropy": 14.08896255493164, "total_loss": 54.14302062988281, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13360297679901123, "vf_explained_var": 0.9601472020149231, "vf_loss": 54.26241683959961}, "grad_time_ms": 753.871}, "pid": 3934253, "time_total_s": 20730.234143018723, "episode_reward_mean": -154.9975954160479, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -174.65220154558435, "policy_reward_mean": {}, "episodes_total": 4296, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -145.09918014006897, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_20-22-03", "training_iteration": 179, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756491723, "episode_len_mean": 50.0, "timesteps_since_restore": 214800, "time_since_restore": 20730.234143018723, "time_this_iter_s": 84.50108027458191, "iterations_since_restore": 179}
+{"timesteps_total": 216000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94974.4, "num_steps_sampled": 216000, "update_time_ms": 2.376, "num_steps_trained": 216000, "load_time_ms": 0.604, "default": {"kl": 0.015153449028730392, "cur_lr": 4.999999873689376e-05, "entropy": 14.264097213745117, "total_loss": 61.21241760253906, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13657930493354797, "vf_explained_var": 0.9583113789558411, "vf_loss": 61.33365249633789}, "grad_time_ms": 739.851}, "pid": 3934253, "time_total_s": 20835.10924553871, "episode_reward_mean": -155.41010977496163, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -187.8065284956767, "policy_reward_mean": {}, "episodes_total": 4320, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.37941258015238, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_20-23-48", "training_iteration": 180, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756491828, "episode_len_mean": 50.0, "timesteps_since_restore": 216000, "time_since_restore": 20835.10924553871, "time_this_iter_s": 104.87510251998901, "iterations_since_restore": 180}
+{"timesteps_total": 217200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93982.285, "num_steps_sampled": 217200, "update_time_ms": 2.417, "num_steps_trained": 217200, "load_time_ms": 0.606, "default": {"kl": 0.016186289489269257, "cur_lr": 4.999999873689376e-05, "entropy": 14.127467155456543, "total_loss": 64.62361145019531, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13219882547855377, "vf_explained_var": 0.9601544141769409, "vf_loss": 64.73941040039062}, "grad_time_ms": 749.797}, "pid": 3934253, "time_total_s": 20919.783405065536, "episode_reward_mean": -155.7737927614949, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -187.8065284956767, "policy_reward_mean": {}, "episodes_total": 4344, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.37941258015238, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_20-25-12", "training_iteration": 181, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756491912, "episode_len_mean": 50.0, "timesteps_since_restore": 217200, "time_since_restore": 20919.783405065536, "time_this_iter_s": 84.67415952682495, "iterations_since_restore": 181}
+{"timesteps_total": 218400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93673.265, "num_steps_sampled": 218400, "update_time_ms": 2.428, "num_steps_trained": 218400, "load_time_ms": 0.612, "default": {"kl": 0.016756556928157806, "cur_lr": 4.999999873689376e-05, "entropy": 14.09090805053711, "total_loss": 32.69347381591797, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13112007081508636, "vf_explained_var": 0.9762242436408997, "vf_loss": 32.8076286315918}, "grad_time_ms": 753.985}, "pid": 3934253, "time_total_s": 21024.484308958054, "episode_reward_mean": -155.6975634315122, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -187.8065284956767, "policy_reward_mean": {}, "episodes_total": 4368, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.37941258015238, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_20-26-57", "training_iteration": 182, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756492017, "episode_len_mean": 50.0, "timesteps_since_restore": 218400, "time_since_restore": 21024.484308958054, "time_this_iter_s": 104.70090389251709, "iterations_since_restore": 182}
+{"timesteps_total": 219600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 96073.239, "num_steps_sampled": 219600, "update_time_ms": 2.455, "num_steps_trained": 219600, "load_time_ms": 0.609, "default": {"kl": 0.014003668911755085, "cur_lr": 4.999999873689376e-05, "entropy": 14.073108673095703, "total_loss": 43.03472137451172, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12060434371232986, "vf_explained_var": 0.9658010005950928, "vf_loss": 43.14114761352539}, "grad_time_ms": 756.764}, "pid": 3934253, "time_total_s": 21141.494768619537, "episode_reward_mean": -155.79708932386183, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -187.8065284956767, "policy_reward_mean": {}, "episodes_total": 4392, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.37941258015238, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_20-28-54", "training_iteration": 183, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756492134, "episode_len_mean": 50.0, "timesteps_since_restore": 219600, "time_since_restore": 21141.494768619537, "time_this_iter_s": 117.01045966148376, "iterations_since_restore": 183}
+{"timesteps_total": 220800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93381.741, "num_steps_sampled": 220800, "update_time_ms": 2.479, "num_steps_trained": 220800, "load_time_ms": 0.607, "default": {"kl": 0.015979474410414696, "cur_lr": 4.999999873689376e-05, "entropy": 13.951452255249023, "total_loss": 28.970035552978516, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13051539659500122, "vf_explained_var": 0.9753679633140564, "vf_loss": 29.084373474121094}, "grad_time_ms": 750.692}, "pid": 3934253, "time_total_s": 21216.059225797653, "episode_reward_mean": -155.2159485927954, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -174.74615890433003, "policy_reward_mean": {}, "episodes_total": 4416, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -149.9222426574402, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_20-30-09", "training_iteration": 184, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756492209, "episode_len_mean": 50.0, "timesteps_since_restore": 220800, "time_since_restore": 21216.059225797653, "time_this_iter_s": 74.56445717811584, "iterations_since_restore": 184}
+{"timesteps_total": 222000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 96436.116, "num_steps_sampled": 222000, "update_time_ms": 2.503, "num_steps_trained": 222000, "load_time_ms": 0.606, "default": {"kl": 0.015772182494401932, "cur_lr": 4.999999873689376e-05, "entropy": 14.0059232711792, "total_loss": 60.55994415283203, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1361880898475647, "vf_explained_var": 0.9553515315055847, "vf_loss": 60.68016052246094}, "grad_time_ms": 762.586}, "pid": 3934253, "time_total_s": 21341.083225011826, "episode_reward_mean": -154.90547833395576, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -180.30431492076218, "policy_reward_mean": {}, "episodes_total": 4440, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.678197164373, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_20-32-14", "training_iteration": 185, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756492334, "episode_len_mean": 50.0, "timesteps_since_restore": 222000, "time_since_restore": 21341.083225011826, "time_this_iter_s": 125.02399921417236, "iterations_since_restore": 185}
+{"timesteps_total": 223200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95140.264, "num_steps_sampled": 223200, "update_time_ms": 2.507, "num_steps_trained": 223200, "load_time_ms": 0.605, "default": {"kl": 0.015124676749110222, "cur_lr": 4.999999873689376e-05, "entropy": 13.924979209899902, "total_loss": 37.755821228027344, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13534162938594818, "vf_explained_var": 0.9738060832023621, "vf_loss": 37.87584686279297}, "grad_time_ms": 752.513}, "pid": 3934253, "time_total_s": 21419.822728157043, "episode_reward_mean": -154.85165366221167, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -180.30431492076218, "policy_reward_mean": {}, "episodes_total": 4464, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.678197164373, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_20-33-33", "training_iteration": 186, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756492413, "episode_len_mean": 50.0, "timesteps_since_restore": 223200, "time_since_restore": 21419.822728157043, "time_this_iter_s": 78.7395031452179, "iterations_since_restore": 186}
+{"timesteps_total": 224400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 97230.341, "num_steps_sampled": 224400, "update_time_ms": 2.447, "num_steps_trained": 224400, "load_time_ms": 0.603, "default": {"kl": 0.01550869271159172, "cur_lr": 4.999999873689376e-05, "entropy": 14.100944519042969, "total_loss": 33.599151611328125, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1279132217168808, "vf_explained_var": 0.9754032492637634, "vf_loss": 33.711360931396484}, "grad_time_ms": 745.508}, "pid": 3934253, "time_total_s": 21520.718727827072, "episode_reward_mean": -154.45393718739763, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -180.30431492076218, "policy_reward_mean": {}, "episodes_total": 4488, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.678197164373, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_20-35-13", "training_iteration": 187, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756492513, "episode_len_mean": 50.0, "timesteps_since_restore": 224400, "time_since_restore": 21520.718727827072, "time_this_iter_s": 100.89599967002869, "iterations_since_restore": 187}
+{"timesteps_total": 225600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 96492.054, "num_steps_sampled": 225600, "update_time_ms": 2.439, "num_steps_trained": 225600, "load_time_ms": 0.606, "default": {"kl": 0.015070527791976929, "cur_lr": 4.999999873689376e-05, "entropy": 14.055828094482422, "total_loss": 31.14375114440918, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1401119828224182, "vf_explained_var": 0.9760143756866455, "vf_loss": 31.26860237121582}, "grad_time_ms": 770.442}, "pid": 3934253, "time_total_s": 21618.440562963486, "episode_reward_mean": -154.46611208847494, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -180.30431492076218, "policy_reward_mean": {}, "episodes_total": 4512, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.678197164373, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_20-36-51", "training_iteration": 188, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756492611, "episode_len_mean": 50.0, "timesteps_since_restore": 225600, "time_since_restore": 21618.440562963486, "time_this_iter_s": 97.72183513641357, "iterations_since_restore": 188}
+{"timesteps_total": 226800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 97484.15, "num_steps_sampled": 226800, "update_time_ms": 2.429, "num_steps_trained": 226800, "load_time_ms": 0.609, "default": {"kl": 0.017055794596672058, "cur_lr": 4.999999873689376e-05, "entropy": 13.9029541015625, "total_loss": 34.64968490600586, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13037782907485962, "vf_explained_var": 0.975419282913208, "vf_loss": 34.76279830932617}, "grad_time_ms": 758.748}, "pid": 3934253, "time_total_s": 21712.745859384537, "episode_reward_mean": -154.22806128008008, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.64550611361074, "policy_reward_mean": {}, "episodes_total": 4536, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -150.59684708886275, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_20-38-26", "training_iteration": 189, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756492706, "episode_len_mean": 50.0, "timesteps_since_restore": 226800, "time_since_restore": 21712.745859384537, "time_this_iter_s": 94.30529642105103, "iterations_since_restore": 189}
+{"timesteps_total": 228000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 98561.022, "num_steps_sampled": 228000, "update_time_ms": 2.417, "num_steps_trained": 228000, "load_time_ms": 0.636, "default": {"kl": 0.015165035612881184, "cur_lr": 4.999999873689376e-05, "entropy": 13.919445037841797, "total_loss": 50.03436279296875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13474352657794952, "vf_explained_var": 0.9623463153839111, "vf_loss": 50.153751373291016}, "grad_time_ms": 757.404}, "pid": 3934253, "time_total_s": 21828.377017736435, "episode_reward_mean": -153.72495777307597, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.64550611361074, "policy_reward_mean": {}, "episodes_total": 4560, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -138.56659806083067, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_20-40-21", "training_iteration": 190, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756492821, "episode_len_mean": 50.0, "timesteps_since_restore": 228000, "time_since_restore": 21828.377017736435, "time_this_iter_s": 115.6311583518982, "iterations_since_restore": 190}
+{"timesteps_total": 229200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 101165.068, "num_steps_sampled": 229200, "update_time_ms": 2.484, "num_steps_trained": 229200, "load_time_ms": 0.661, "default": {"kl": 0.015464269556105137, "cur_lr": 4.999999873689376e-05, "entropy": 13.737651824951172, "total_loss": 23.389896392822266, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13375920057296753, "vf_explained_var": 0.9811168313026428, "vf_loss": 23.50799560546875}, "grad_time_ms": 738.619}, "pid": 3934253, "time_total_s": 21938.90476822853, "episode_reward_mean": -153.30432291500162, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.64550611361074, "policy_reward_mean": {}, "episodes_total": 4584, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -138.56659806083067, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_20-42-12", "training_iteration": 191, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756492932, "episode_len_mean": 50.0, "timesteps_since_restore": 229200, "time_since_restore": 21938.90476822853, "time_this_iter_s": 110.52775049209595, "iterations_since_restore": 191}
+{"timesteps_total": 230400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 101400.198, "num_steps_sampled": 230400, "update_time_ms": 2.456, "num_steps_trained": 230400, "load_time_ms": 0.657, "default": {"kl": 0.01649424433708191, "cur_lr": 4.999999873689376e-05, "entropy": 13.909981727600098, "total_loss": 30.5107421875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1406993716955185, "vf_explained_var": 0.9759019017219543, "vf_loss": 30.63473892211914}, "grad_time_ms": 709.028}, "pid": 3934253, "time_total_s": 22045.659630537033, "episode_reward_mean": -153.2856807334686, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -169.60459859319087, "policy_reward_mean": {}, "episodes_total": 4608, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -138.56659806083067, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_20-43-59", "training_iteration": 192, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756493039, "episode_len_mean": 50.0, "timesteps_since_restore": 230400, "time_since_restore": 22045.659630537033, "time_this_iter_s": 106.7548623085022, "iterations_since_restore": 192}
+{"timesteps_total": 231600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 99165.403, "num_steps_sampled": 231600, "update_time_ms": 2.423, "num_steps_trained": 231600, "load_time_ms": 0.655, "default": {"kl": 0.014766073785722256, "cur_lr": 4.999999873689376e-05, "entropy": 13.603525161743164, "total_loss": 46.67988586425781, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12098463624715805, "vf_explained_var": 0.9633685350418091, "vf_loss": 46.78591537475586}, "grad_time_ms": 715.39}, "pid": 3934253, "time_total_s": 22140.386114120483, "episode_reward_mean": -153.8011387577607, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -169.60459859319087, "policy_reward_mean": {}, "episodes_total": 4632, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -138.56659806083067, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_20-45-33", "training_iteration": 193, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756493133, "episode_len_mean": 50.0, "timesteps_since_restore": 231600, "time_since_restore": 22140.386114120483, "time_this_iter_s": 94.72648358345032, "iterations_since_restore": 193}
+{"timesteps_total": 232800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 103001.441, "num_steps_sampled": 232800, "update_time_ms": 2.447, "num_steps_trained": 232800, "load_time_ms": 0.673, "default": {"kl": 0.016012491658329964, "cur_lr": 4.999999873689376e-05, "entropy": 13.908878326416016, "total_loss": 31.52902603149414, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12783116102218628, "vf_explained_var": 0.9771274924278259, "vf_loss": 31.64064598083496}, "grad_time_ms": 727.675}, "pid": 3934253, "time_total_s": 22253.433773756027, "episode_reward_mean": -154.14178573010508, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -169.60459859319087, "policy_reward_mean": {}, "episodes_total": 4656, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -138.56659806083067, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_20-47-26", "training_iteration": 194, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756493246, "episode_len_mean": 50.0, "timesteps_since_restore": 232800, "time_since_restore": 22253.433773756027, "time_this_iter_s": 113.04765963554382, "iterations_since_restore": 194}
+{"timesteps_total": 234000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 101708.056, "num_steps_sampled": 234000, "update_time_ms": 2.457, "num_steps_trained": 234000, "load_time_ms": 0.669, "default": {"kl": 0.015001552179455757, "cur_lr": 4.999999873689376e-05, "entropy": 13.77999496459961, "total_loss": 35.390708923339844, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12304878234863281, "vf_explained_var": 0.9735833406448364, "vf_loss": 35.498565673828125}, "grad_time_ms": 728.105}, "pid": 3934253, "time_total_s": 22365.52901148796, "episode_reward_mean": -154.76540725376347, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -169.60459859319087, "policy_reward_mean": {}, "episodes_total": 4680, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -150.01169480783062, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_20-49-18", "training_iteration": 195, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756493358, "episode_len_mean": 50.0, "timesteps_since_restore": 234000, "time_since_restore": 22365.52901148796, "time_this_iter_s": 112.0952377319336, "iterations_since_restore": 195}
+{"timesteps_total": 235200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 101804.077, "num_steps_sampled": 235200, "update_time_ms": 2.433, "num_steps_trained": 235200, "load_time_ms": 0.671, "default": {"kl": 0.01647140271961689, "cur_lr": 4.999999873689376e-05, "entropy": 13.635623931884766, "total_loss": 52.579586029052734, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13822194933891296, "vf_explained_var": 0.9653752446174622, "vf_loss": 52.70112609863281}, "grad_time_ms": 738.72}, "pid": 3934253, "time_total_s": 22445.33472752571, "episode_reward_mean": -154.83888058703687, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -177.26881957412837, "policy_reward_mean": {}, "episodes_total": 4704, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -146.65800145858734, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_20-50-38", "training_iteration": 196, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756493438, "episode_len_mean": 50.0, "timesteps_since_restore": 235200, "time_since_restore": 22445.33472752571, "time_this_iter_s": 79.80571603775024, "iterations_since_restore": 196}
+{"timesteps_total": 236400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 102468.947, "num_steps_sampled": 236400, "update_time_ms": 2.481, "num_steps_trained": 236400, "load_time_ms": 0.678, "default": {"kl": 0.015982117503881454, "cur_lr": 4.999999873689376e-05, "entropy": 13.740926742553711, "total_loss": 18.595247268676758, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12767238914966583, "vf_explained_var": 0.9844462275505066, "vf_loss": 18.706737518310547}, "grad_time_ms": 738.975}, "pid": 3934253, "time_total_s": 22552.882929325104, "episode_reward_mean": -154.33678146760286, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -177.26881957412837, "policy_reward_mean": {}, "episodes_total": 4728, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -146.65800145858734, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_20-52-26", "training_iteration": 197, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756493546, "episode_len_mean": 50.0, "timesteps_since_restore": 236400, "time_since_restore": 22552.882929325104, "time_this_iter_s": 107.5482017993927, "iterations_since_restore": 197}
+{"timesteps_total": 237600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 103790.417, "num_steps_sampled": 237600, "update_time_ms": 2.534, "num_steps_trained": 237600, "load_time_ms": 0.673, "default": {"kl": 0.014276721514761448, "cur_lr": 4.999999873689376e-05, "entropy": 13.760214805603027, "total_loss": 41.77705383300781, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1205190122127533, "vf_explained_var": 0.969501793384552, "vf_loss": 41.88311767578125}, "grad_time_ms": 718.444}, "pid": 3934253, "time_total_s": 22663.61433315277, "episode_reward_mean": -154.44646720506114, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -201.16762912816088, "policy_reward_mean": {}, "episodes_total": 4752, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -146.65800145858734, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_20-54-17", "training_iteration": 198, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756493657, "episode_len_mean": 50.0, "timesteps_since_restore": 237600, "time_since_restore": 22663.61433315277, "time_this_iter_s": 110.73140382766724, "iterations_since_restore": 198}
+{"timesteps_total": 238800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 104968.257, "num_steps_sampled": 238800, "update_time_ms": 2.498, "num_steps_trained": 238800, "load_time_ms": 0.673, "default": {"kl": 0.01621420495212078, "cur_lr": 4.999999873689376e-05, "entropy": 13.574341773986816, "total_loss": 46.5653076171875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13176356256008148, "vf_explained_var": 0.9670212268829346, "vf_loss": 46.68064880371094}, "grad_time_ms": 720.256}, "pid": 3934253, "time_total_s": 22769.716091156006, "episode_reward_mean": -154.59433723746173, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -201.16762912816088, "policy_reward_mean": {}, "episodes_total": 4776, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -146.65800145858734, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_20-56-03", "training_iteration": 199, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756493763, "episode_len_mean": 50.0, "timesteps_since_restore": 238800, "time_since_restore": 22769.716091156006, "time_this_iter_s": 106.10175800323486, "iterations_since_restore": 199}
+{"timesteps_total": 240000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 105216.055, "num_steps_sampled": 240000, "update_time_ms": 2.462, "num_steps_trained": 240000, "load_time_ms": 0.641, "default": {"kl": 0.014780566096305847, "cur_lr": 4.999999873689376e-05, "entropy": 13.378012657165527, "total_loss": 36.66807174682617, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14128637313842773, "vf_explained_var": 0.9747660756111145, "vf_loss": 36.79439163208008}, "grad_time_ms": 725.314}, "pid": 3934253, "time_total_s": 22887.873666524887, "episode_reward_mean": -154.4375084794087, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -201.16762912816088, "policy_reward_mean": {}, "episodes_total": 4800, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -149.0978238513307, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_20-58-01", "training_iteration": 200, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756493881, "episode_len_mean": 50.0, "timesteps_since_restore": 240000, "time_since_restore": 22887.873666524887, "time_this_iter_s": 118.15757536888123, "iterations_since_restore": 200}
+{"timesteps_total": 241200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 104910.777, "num_steps_sampled": 241200, "update_time_ms": 2.452, "num_steps_trained": 241200, "load_time_ms": 0.606, "default": {"kl": 0.013666907325387001, "cur_lr": 4.999999873689376e-05, "entropy": 13.640439987182617, "total_loss": 49.74296188354492, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12500447034835815, "vf_explained_var": 0.9647335410118103, "vf_loss": 49.85413360595703}, "grad_time_ms": 735.828}, "pid": 3934253, "time_total_s": 22995.453704595566, "episode_reward_mean": -154.5080193255974, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -201.16762912816088, "policy_reward_mean": {}, "episodes_total": 4824, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -146.52584462153817, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_20-59-48", "training_iteration": 201, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756493988, "episode_len_mean": 50.0, "timesteps_since_restore": 241200, "time_since_restore": 22995.453704595566, "time_this_iter_s": 107.58003807067871, "iterations_since_restore": 201}
+{"timesteps_total": 242400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 102243.239, "num_steps_sampled": 242400, "update_time_ms": 2.469, "num_steps_trained": 242400, "load_time_ms": 0.604, "default": {"kl": 0.015573102980852127, "cur_lr": 4.999999873689376e-05, "entropy": 13.60585880279541, "total_loss": 44.72560501098633, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11865009367465973, "vf_explained_var": 0.9653467535972595, "vf_loss": 44.828487396240234}, "grad_time_ms": 732.415}, "pid": 3934253, "time_total_s": 23075.499824762344, "episode_reward_mean": -154.390515036229, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -201.16762912816088, "policy_reward_mean": {}, "episodes_total": 4848, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -144.64655797683017, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_21-01-09", "training_iteration": 202, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756494069, "episode_len_mean": 50.0, "timesteps_since_restore": 242400, "time_since_restore": 23075.499824762344, "time_this_iter_s": 80.04612016677856, "iterations_since_restore": 202}
+{"timesteps_total": 243600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 103775.09, "num_steps_sampled": 243600, "update_time_ms": 2.511, "num_steps_trained": 243600, "load_time_ms": 0.607, "default": {"kl": 0.01503191888332367, "cur_lr": 4.999999873689376e-05, "entropy": 13.590577125549316, "total_loss": 42.913047790527344, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11701390147209167, "vf_explained_var": 0.9663113355636597, "vf_loss": 43.01484298706055}, "grad_time_ms": 733.825}, "pid": 3934253, "time_total_s": 23185.55954527855, "episode_reward_mean": -153.60317569799324, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.37361769890444, "policy_reward_mean": {}, "episodes_total": 4872, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -138.32310226038112, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_21-02-59", "training_iteration": 203, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756494179, "episode_len_mean": 50.0, "timesteps_since_restore": 243600, "time_since_restore": 23185.55954527855, "time_this_iter_s": 110.05972051620483, "iterations_since_restore": 203}
+{"timesteps_total": 244800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 102549.786, "num_steps_sampled": 244800, "update_time_ms": 2.504, "num_steps_trained": 244800, "load_time_ms": 0.591, "default": {"kl": 0.016744563356041908, "cur_lr": 4.999999873689376e-05, "entropy": 13.430876731872559, "total_loss": 37.94887924194336, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13519436120986938, "vf_explained_var": 0.9705941677093506, "vf_loss": 38.06712341308594}, "grad_time_ms": 721.255}, "pid": 3934253, "time_total_s": 23286.228005886078, "episode_reward_mean": -153.52169316652558, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -168.51605431528077, "policy_reward_mean": {}, "episodes_total": 4896, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -138.32310226038112, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_21-04-39", "training_iteration": 204, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756494279, "episode_len_mean": 50.0, "timesteps_since_restore": 244800, "time_since_restore": 23286.228005886078, "time_this_iter_s": 100.66846060752869, "iterations_since_restore": 204}
+{"timesteps_total": 246000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 101212.226, "num_steps_sampled": 246000, "update_time_ms": 2.459, "num_steps_trained": 246000, "load_time_ms": 0.596, "default": {"kl": 0.01710333861410618, "cur_lr": 4.999999873689376e-05, "entropy": 13.417792320251465, "total_loss": 38.24384689331055, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.144325852394104, "vf_explained_var": 0.9721401929855347, "vf_loss": 38.37085723876953}, "grad_time_ms": 737.0}, "pid": 3934253, "time_total_s": 23385.1042368412, "episode_reward_mean": -153.58170670594885, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -168.51605431528077, "policy_reward_mean": {}, "episodes_total": 4920, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -138.32310226038112, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_21-06-18", "training_iteration": 205, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756494378, "episode_len_mean": 50.0, "timesteps_since_restore": 246000, "time_since_restore": 23385.1042368412, "time_this_iter_s": 98.8762309551239, "iterations_since_restore": 205}
+{"timesteps_total": 247200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 103806.858, "num_steps_sampled": 247200, "update_time_ms": 2.565, "num_steps_trained": 247200, "load_time_ms": 0.614, "default": {"kl": 0.015271955169737339, "cur_lr": 4.999999873689376e-05, "entropy": 13.395512580871582, "total_loss": 54.9863166809082, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14978620409965515, "vf_explained_var": 0.9647194743156433, "vf_loss": 55.12063980102539}, "grad_time_ms": 729.754}, "pid": 3934253, "time_total_s": 23490.784667491913, "episode_reward_mean": -153.93582146379998, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -168.51605431528077, "policy_reward_mean": {}, "episodes_total": 4944, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -138.32310226038112, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_21-08-04", "training_iteration": 206, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756494484, "episode_len_mean": 50.0, "timesteps_since_restore": 247200, "time_since_restore": 23490.784667491913, "time_this_iter_s": 105.68043065071106, "iterations_since_restore": 206}
+{"timesteps_total": 248400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 103302.599, "num_steps_sampled": 248400, "update_time_ms": 2.567, "num_steps_trained": 248400, "load_time_ms": 0.611, "default": {"kl": 0.013181351125240326, "cur_lr": 4.999999873689376e-05, "entropy": 13.397079467773438, "total_loss": 47.21424865722656, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1278305947780609, "vf_explained_var": 0.9659013748168945, "vf_loss": 47.328731536865234}, "grad_time_ms": 724.667}, "pid": 3934253, "time_total_s": 23593.239156246185, "episode_reward_mean": -154.25386999328757, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -181.2020651411598, "policy_reward_mean": {}, "episodes_total": 4968, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -140.24452928526324, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_21-09-46", "training_iteration": 207, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756494586, "episode_len_mean": 50.0, "timesteps_since_restore": 248400, "time_since_restore": 23593.239156246185, "time_this_iter_s": 102.45448875427246, "iterations_since_restore": 207}
+{"timesteps_total": 249600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 102177.445, "num_steps_sampled": 249600, "update_time_ms": 2.522, "num_steps_trained": 249600, "load_time_ms": 0.621, "default": {"kl": 0.014789672568440437, "cur_lr": 4.999999873689376e-05, "entropy": 13.347824096679688, "total_loss": 56.15548324584961, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.139645516872406, "vf_explained_var": 0.9593076705932617, "vf_loss": 56.2801513671875}, "grad_time_ms": 736.067}, "pid": 3934253, "time_total_s": 23692.833278894424, "episode_reward_mean": -154.4865686886029, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -181.2020651411598, "policy_reward_mean": {}, "episodes_total": 4992, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -146.07667147403822, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_21-11-26", "training_iteration": 208, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756494686, "episode_len_mean": 50.0, "timesteps_since_restore": 249600, "time_since_restore": 23692.833278894424, "time_this_iter_s": 99.59412264823914, "iterations_since_restore": 208}
+{"timesteps_total": 250800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 101683.031, "num_steps_sampled": 250800, "update_time_ms": 2.546, "num_steps_trained": 250800, "load_time_ms": 0.626, "default": {"kl": 0.015958771109580994, "cur_lr": 4.999999873689376e-05, "entropy": 13.283158302307129, "total_loss": 32.21907424926758, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14139731228351593, "vf_explained_var": 0.9757466912269592, "vf_loss": 32.34431457519531}, "grad_time_ms": 744.585}, "pid": 3934253, "time_total_s": 23794.076202869415, "episode_reward_mean": -154.57491315828824, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -181.2020651411598, "policy_reward_mean": {}, "episodes_total": 5016, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -146.07667147403822, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_21-13-07", "training_iteration": 209, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756494787, "episode_len_mean": 50.0, "timesteps_since_restore": 250800, "time_since_restore": 23794.076202869415, "time_this_iter_s": 101.24292397499084, "iterations_since_restore": 209}
+{"timesteps_total": 252000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 100320.453, "num_steps_sampled": 252000, "update_time_ms": 2.589, "num_steps_trained": 252000, "load_time_ms": 0.627, "default": {"kl": 0.016961511224508286, "cur_lr": 4.999999873689376e-05, "entropy": 13.437080383300781, "total_loss": 28.432422637939453, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1382811963558197, "vf_explained_var": 0.9765098094940186, "vf_loss": 28.55352783203125}, "grad_time_ms": 750.972}, "pid": 3934253, "time_total_s": 23898.673114538193, "episode_reward_mean": -154.64418535655625, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -181.2020651411598, "policy_reward_mean": {}, "episodes_total": 5040, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -144.40869175206473, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_21-14-52", "training_iteration": 210, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756494892, "episode_len_mean": 50.0, "timesteps_since_restore": 252000, "time_since_restore": 23898.673114538193, "time_this_iter_s": 104.59691166877747, "iterations_since_restore": 210}
+{"timesteps_total": 253200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 101961.698, "num_steps_sampled": 253200, "update_time_ms": 2.533, "num_steps_trained": 253200, "load_time_ms": 0.649, "default": {"kl": 0.015320269390940666, "cur_lr": 4.999999873689376e-05, "entropy": 13.378397941589355, "total_loss": 38.70890808105469, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13138148188591003, "vf_explained_var": 0.9688937067985535, "vf_loss": 38.82477951049805}, "grad_time_ms": 745.729}, "pid": 3934253, "time_total_s": 24022.612620592117, "episode_reward_mean": -154.84949540505792, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -177.23204123604674, "policy_reward_mean": {}, "episodes_total": 5064, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -144.40869175206473, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_21-16-56", "training_iteration": 211, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756495016, "episode_len_mean": 50.0, "timesteps_since_restore": 253200, "time_since_restore": 24022.612620592117, "time_this_iter_s": 123.93950605392456, "iterations_since_restore": 211}
+{"timesteps_total": 254400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 104314.751, "num_steps_sampled": 254400, "update_time_ms": 2.506, "num_steps_trained": 254400, "load_time_ms": 0.649, "default": {"kl": 0.015276423655450344, "cur_lr": 4.999999873689376e-05, "entropy": 13.541495323181152, "total_loss": 35.86064910888672, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.135739266872406, "vf_explained_var": 0.9777436852455139, "vf_loss": 35.980918884277344}, "grad_time_ms": 759.486}, "pid": 3934253, "time_total_s": 24126.327362060547, "episode_reward_mean": -154.75495105972402, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -175.558753189674, "policy_reward_mean": {}, "episodes_total": 5088, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.65037420939933, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_21-18-40", "training_iteration": 212, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756495120, "episode_len_mean": 50.0, "timesteps_since_restore": 254400, "time_since_restore": 24126.327362060547, "time_this_iter_s": 103.71474146842957, "iterations_since_restore": 212}
+{"timesteps_total": 255600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 106540.291, "num_steps_sampled": 255600, "update_time_ms": 2.506, "num_steps_trained": 255600, "load_time_ms": 0.658, "default": {"kl": 0.014562960714101791, "cur_lr": 4.999999873689376e-05, "entropy": 13.229193687438965, "total_loss": 27.504392623901367, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12833461165428162, "vf_explained_var": 0.9786010384559631, "vf_loss": 27.617982864379883}, "grad_time_ms": 736.482}, "pid": 3934253, "time_total_s": 24258.412103414536, "episode_reward_mean": -154.48851868906385, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -175.558753189674, "policy_reward_mean": {}, "episodes_total": 5112, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.65037420939933, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_21-20-52", "training_iteration": 213, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756495252, "episode_len_mean": 50.0, "timesteps_since_restore": 255600, "time_since_restore": 24258.412103414536, "time_this_iter_s": 132.08474135398865, "iterations_since_restore": 213}
+{"timesteps_total": 256800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 105849.683, "num_steps_sampled": 256800, "update_time_ms": 2.509, "num_steps_trained": 256800, "load_time_ms": 0.659, "default": {"kl": 0.014660666696727276, "cur_lr": 4.999999873689376e-05, "entropy": 13.363061904907227, "total_loss": 40.02897644042969, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13153332471847534, "vf_explained_var": 0.9764943718910217, "vf_loss": 40.14567184448242}, "grad_time_ms": 728.893}, "pid": 3934253, "time_total_s": 24352.10121202469, "episode_reward_mean": -154.46203690448567, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -175.558753189674, "policy_reward_mean": {}, "episodes_total": 5136, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.65037420939933, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_21-22-25", "training_iteration": 214, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756495345, "episode_len_mean": 50.0, "timesteps_since_restore": 256800, "time_since_restore": 24352.10121202469, "time_this_iter_s": 93.6891086101532, "iterations_since_restore": 214}
+{"timesteps_total": 258000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 106030.498, "num_steps_sampled": 258000, "update_time_ms": 2.55, "num_steps_trained": 258000, "load_time_ms": 0.658, "default": {"kl": 0.015574107877910137, "cur_lr": 4.999999873689376e-05, "entropy": 13.24034595489502, "total_loss": 23.518882751464844, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13245254755020142, "vf_explained_var": 0.9813408255577087, "vf_loss": 23.63556671142578}, "grad_time_ms": 724.611}, "pid": 3934253, "time_total_s": 24452.742853164673, "episode_reward_mean": -153.56616856118634, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -175.558753189674, "policy_reward_mean": {}, "episodes_total": 5160, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.65037420939933, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_21-24-06", "training_iteration": 215, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756495446, "episode_len_mean": 50.0, "timesteps_since_restore": 258000, "time_since_restore": 24452.742853164673, "time_this_iter_s": 100.64164113998413, "iterations_since_restore": 215}
+{"timesteps_total": 259200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 106625.713, "num_steps_sampled": 259200, "update_time_ms": 2.448, "num_steps_trained": 259200, "load_time_ms": 0.639, "default": {"kl": 0.015010246075689793, "cur_lr": 4.999999873689376e-05, "entropy": 13.155643463134766, "total_loss": 42.44038009643555, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12642936408519745, "vf_explained_var": 0.9697035551071167, "vf_loss": 42.55160903930664}, "grad_time_ms": 724.094}, "pid": 3934253, "time_total_s": 24564.368947267532, "episode_reward_mean": -154.02823510503526, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -178.46962133035237, "policy_reward_mean": {}, "episodes_total": 5184, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.65037420939933, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_21-25-58", "training_iteration": 216, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756495558, "episode_len_mean": 50.0, "timesteps_since_restore": 259200, "time_since_restore": 24564.368947267532, "time_this_iter_s": 111.6260941028595, "iterations_since_restore": 216}
+{"timesteps_total": 260400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 106940.569, "num_steps_sampled": 260400, "update_time_ms": 2.416, "num_steps_trained": 260400, "load_time_ms": 0.636, "default": {"kl": 0.015397397801280022, "cur_lr": 4.999999873689376e-05, "entropy": 13.084989547729492, "total_loss": 37.89418029785156, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1234256848692894, "vf_explained_var": 0.9787766933441162, "vf_loss": 38.002017974853516}, "grad_time_ms": 719.41}, "pid": 3934253, "time_total_s": 24669.925053358078, "episode_reward_mean": -154.47670628352498, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -178.46962133035237, "policy_reward_mean": {}, "episodes_total": 5208, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -144.31778136586442, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_21-27-43", "training_iteration": 217, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756495663, "episode_len_mean": 50.0, "timesteps_since_restore": 260400, "time_since_restore": 24669.925053358078, "time_this_iter_s": 105.55610609054565, "iterations_since_restore": 217}
+{"timesteps_total": 261600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 107460.8, "num_steps_sampled": 261600, "update_time_ms": 2.437, "num_steps_trained": 261600, "load_time_ms": 0.636, "default": {"kl": 0.015226908959448338, "cur_lr": 4.999999873689376e-05, "entropy": 13.149221420288086, "total_loss": 47.839778900146484, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13463148474693298, "vf_explained_var": 0.9676254987716675, "vf_loss": 47.95899200439453}, "grad_time_ms": 716.797}, "pid": 3934253, "time_total_s": 24774.695830106735, "episode_reward_mean": -154.79985125114234, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -178.46962133035237, "policy_reward_mean": {}, "episodes_total": 5232, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.7615888181636, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_21-29-28", "training_iteration": 218, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756495768, "episode_len_mean": 50.0, "timesteps_since_restore": 261600, "time_since_restore": 24774.695830106735, "time_this_iter_s": 104.77077674865723, "iterations_since_restore": 218}
+{"timesteps_total": 262800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 109240.82, "num_steps_sampled": 262800, "update_time_ms": 2.408, "num_steps_trained": 262800, "load_time_ms": 0.629, "default": {"kl": 0.013625938445329666, "cur_lr": 4.999999873689376e-05, "entropy": 13.160884857177734, "total_loss": 38.04711151123047, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12461742758750916, "vf_explained_var": 0.9737904667854309, "vf_loss": 38.15793228149414}, "grad_time_ms": 707.953}, "pid": 3934253, "time_total_s": 24893.649383544922, "episode_reward_mean": -154.8291380370024, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -178.46962133035237, "policy_reward_mean": {}, "episodes_total": 5256, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.7615888181636, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_21-31-27", "training_iteration": 219, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756495887, "episode_len_mean": 50.0, "timesteps_since_restore": 262800, "time_since_restore": 24893.649383544922, "time_this_iter_s": 118.95355343818665, "iterations_since_restore": 219}
+{"timesteps_total": 264000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 106807.186, "num_steps_sampled": 264000, "update_time_ms": 2.388, "num_steps_trained": 264000, "load_time_ms": 0.63, "default": {"kl": 0.01404307596385479, "cur_lr": 4.999999873689376e-05, "entropy": 13.15298080444336, "total_loss": 49.057411193847656, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13220664858818054, "vf_explained_var": 0.965358316898346, "vf_loss": 49.17539978027344}, "grad_time_ms": 709.126}, "pid": 3934253, "time_total_s": 24973.92138981819, "episode_reward_mean": -154.6194946052812, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -172.94731992277121, "policy_reward_mean": {}, "episodes_total": 5280, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.7615888181636, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_21-32-47", "training_iteration": 220, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756495967, "episode_len_mean": 50.0, "timesteps_since_restore": 264000, "time_since_restore": 24973.92138981819, "time_this_iter_s": 80.27200627326965, "iterations_since_restore": 220}
+{"timesteps_total": 265200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 100237.639, "num_steps_sampled": 265200, "update_time_ms": 2.377, "num_steps_trained": 265200, "load_time_ms": 0.616, "default": {"kl": 0.014865408651530743, "cur_lr": 4.999999873689376e-05, "entropy": 13.083892822265625, "total_loss": 21.751710891723633, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13358891010284424, "vf_explained_var": 0.9818400144577026, "vf_loss": 21.870248794555664}, "grad_time_ms": 722.184}, "pid": 3934253, "time_total_s": 25032.295438051224, "episode_reward_mean": -154.0848343995392, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -172.94731992277121, "policy_reward_mean": {}, "episodes_total": 5304, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.7615888181636, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_21-33-46", "training_iteration": 221, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756496026, "episode_len_mean": 50.0, "timesteps_since_restore": 265200, "time_since_restore": 25032.295438051224, "time_this_iter_s": 58.37404823303223, "iterations_since_restore": 221}
+{"timesteps_total": 266400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 102143.166, "num_steps_sampled": 266400, "update_time_ms": 2.42, "num_steps_trained": 266400, "load_time_ms": 0.617, "default": {"kl": 0.014355365186929703, "cur_lr": 4.999999873689376e-05, "entropy": 13.014341354370117, "total_loss": 67.55355072021484, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13376568257808685, "vf_explained_var": 0.9518985748291016, "vf_loss": 67.67278289794922}, "grad_time_ms": 737.906}, "pid": 3934253, "time_total_s": 25155.22252869606, "episode_reward_mean": -154.03806347040836, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -174.09409334392393, "policy_reward_mean": {}, "episodes_total": 5328, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -145.41766044712392, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_21-35-49", "training_iteration": 222, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756496149, "episode_len_mean": 50.0, "timesteps_since_restore": 266400, "time_since_restore": 25155.22252869606, "time_this_iter_s": 122.92709064483643, "iterations_since_restore": 222}
+{"timesteps_total": 267600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 99729.598, "num_steps_sampled": 267600, "update_time_ms": 2.413, "num_steps_trained": 267600, "load_time_ms": 0.605, "default": {"kl": 0.015129496343433857, "cur_lr": 4.999999873689376e-05, "entropy": 13.16669750213623, "total_loss": 23.677480697631836, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13773919641971588, "vf_explained_var": 0.9821985960006714, "vf_loss": 23.79990005493164}, "grad_time_ms": 742.503}, "pid": 3934253, "time_total_s": 25263.21758890152, "episode_reward_mean": -153.84540569107764, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -174.09409334392393, "policy_reward_mean": {}, "episodes_total": 5352, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -145.41766044712392, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_21-37-37", "training_iteration": 223, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756496257, "episode_len_mean": 50.0, "timesteps_since_restore": 267600, "time_since_restore": 25263.21758890152, "time_this_iter_s": 107.9950602054596, "iterations_since_restore": 223}
+{"timesteps_total": 268800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 99750.405, "num_steps_sampled": 268800, "update_time_ms": 2.491, "num_steps_trained": 268800, "load_time_ms": 0.613, "default": {"kl": 0.014904823154211044, "cur_lr": 4.999999873689376e-05, "entropy": 13.1945161819458, "total_loss": 19.52242088317871, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14050191640853882, "vf_explained_var": 0.9843916296958923, "vf_loss": 19.647830963134766}, "grad_time_ms": 760.738}, "pid": 3934253, "time_total_s": 25357.295568943024, "episode_reward_mean": -153.5902486105291, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -175.99024313429615, "policy_reward_mean": {}, "episodes_total": 5376, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -145.11019265055916, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_21-39-11", "training_iteration": 224, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756496351, "episode_len_mean": 50.0, "timesteps_since_restore": 268800, "time_since_restore": 25357.295568943024, "time_this_iter_s": 94.0779800415039, "iterations_since_restore": 224}
+{"timesteps_total": 270000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 100893.525, "num_steps_sampled": 270000, "update_time_ms": 2.487, "num_steps_trained": 270000, "load_time_ms": 0.617, "default": {"kl": 0.015589192509651184, "cur_lr": 4.999999873689376e-05, "entropy": 12.922922134399414, "total_loss": 29.725852966308594, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13020819425582886, "vf_explained_var": 0.9754431247711182, "vf_loss": 29.84027862548828}, "grad_time_ms": 755.303}, "pid": 3934253, "time_total_s": 25469.313912391663, "episode_reward_mean": -153.95590331783544, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -175.99024313429615, "policy_reward_mean": {}, "episodes_total": 5400, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -144.9118933600018, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_21-41-03", "training_iteration": 225, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756496463, "episode_len_mean": 50.0, "timesteps_since_restore": 270000, "time_since_restore": 25469.313912391663, "time_this_iter_s": 112.01834344863892, "iterations_since_restore": 225}
+{"timesteps_total": 271200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 100926.159, "num_steps_sampled": 271200, "update_time_ms": 2.492, "num_steps_trained": 271200, "load_time_ms": 0.614, "default": {"kl": 0.01505206897854805, "cur_lr": 4.999999873689376e-05, "entropy": 12.80807876586914, "total_loss": 23.48046112060547, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13089901208877563, "vf_explained_var": 0.980962872505188, "vf_loss": 23.596118927001953}, "grad_time_ms": 754.195}, "pid": 3934253, "time_total_s": 25581.256008148193, "episode_reward_mean": -153.83885704993364, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -175.99024313429615, "policy_reward_mean": {}, "episodes_total": 5424, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -144.9118933600018, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_21-42-55", "training_iteration": 226, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756496575, "episode_len_mean": 50.0, "timesteps_since_restore": 271200, "time_since_restore": 25581.256008148193, "time_this_iter_s": 111.94209575653076, "iterations_since_restore": 226}
+{"timesteps_total": 272400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 100114.556, "num_steps_sampled": 272400, "update_time_ms": 2.515, "num_steps_trained": 272400, "load_time_ms": 0.613, "default": {"kl": 0.015062487684190273, "cur_lr": 4.999999873689376e-05, "entropy": 12.996514320373535, "total_loss": 24.47572898864746, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1414356827735901, "vf_explained_var": 0.9804410934448242, "vf_loss": 24.601913452148438}, "grad_time_ms": 755.148}, "pid": 3934253, "time_total_s": 25678.705996513367, "episode_reward_mean": -154.2308098880995, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -175.99024313429615, "policy_reward_mean": {}, "episodes_total": 5448, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -144.9118933600018, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_21-44-32", "training_iteration": 227, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756496672, "episode_len_mean": 50.0, "timesteps_since_restore": 272400, "time_since_restore": 25678.705996513367, "time_this_iter_s": 97.44998836517334, "iterations_since_restore": 227}
+{"timesteps_total": 273600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 97764.171, "num_steps_sampled": 273600, "update_time_ms": 2.513, "num_steps_trained": 273600, "load_time_ms": 0.605, "default": {"kl": 0.014948169700801373, "cur_lr": 4.999999873689376e-05, "entropy": 12.985085487365723, "total_loss": 51.06395721435547, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14840246737003326, "vf_explained_var": 0.9613332748413086, "vf_loss": 51.197227478027344}, "grad_time_ms": 767.314}, "pid": 3934253, "time_total_s": 25760.094562768936, "episode_reward_mean": -154.3562061237597, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -172.89268666728373, "policy_reward_mean": {}, "episodes_total": 5472, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -144.9118933600018, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_21-45-54", "training_iteration": 228, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756496754, "episode_len_mean": 50.0, "timesteps_since_restore": 273600, "time_since_restore": 25760.094562768936, "time_this_iter_s": 81.38856625556946, "iterations_since_restore": 228}
+{"timesteps_total": 274800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95601.535, "num_steps_sampled": 274800, "update_time_ms": 2.6, "num_steps_trained": 274800, "load_time_ms": 0.608, "default": {"kl": 0.014542028307914734, "cur_lr": 4.999999873689376e-05, "entropy": 13.030766487121582, "total_loss": 61.48118591308594, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13130980730056763, "vf_explained_var": 0.9557677507400513, "vf_loss": 61.5977783203125}, "grad_time_ms": 769.226}, "pid": 3934253, "time_total_s": 25857.44341278076, "episode_reward_mean": -155.07597284154912, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -191.1136767254141, "policy_reward_mean": {}, "episodes_total": 5496, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -148.96356347694825, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_21-47-31", "training_iteration": 229, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756496851, "episode_len_mean": 50.0, "timesteps_since_restore": 274800, "time_since_restore": 25857.44341278076, "time_this_iter_s": 97.34885001182556, "iterations_since_restore": 229}
+{"timesteps_total": 276000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 96856.808, "num_steps_sampled": 276000, "update_time_ms": 2.746, "num_steps_trained": 276000, "load_time_ms": 0.616, "default": {"kl": 0.01313636265695095, "cur_lr": 4.999999873689376e-05, "entropy": 12.705184936523438, "total_loss": 44.72980499267578, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12996193766593933, "vf_explained_var": 0.966231644153595, "vf_loss": 44.84646987915039}, "grad_time_ms": 764.339}, "pid": 3934253, "time_total_s": 25950.22126197815, "episode_reward_mean": -155.30187061257263, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -191.1136767254141, "policy_reward_mean": {}, "episodes_total": 5520, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -148.96356347694825, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_21-49-04", "training_iteration": 230, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756496944, "episode_len_mean": 50.0, "timesteps_since_restore": 276000, "time_since_restore": 25950.22126197815, "time_this_iter_s": 92.7778491973877, "iterations_since_restore": 230}
+{"timesteps_total": 277200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 101806.465, "num_steps_sampled": 277200, "update_time_ms": 2.732, "num_steps_trained": 277200, "load_time_ms": 0.619, "default": {"kl": 0.016049357131123543, "cur_lr": 4.999999873689376e-05, "entropy": 12.747896194458008, "total_loss": 39.389190673828125, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1357201188802719, "vf_explained_var": 0.9743813276290894, "vf_loss": 39.508663177490234}, "grad_time_ms": 767.366}, "pid": 3934253, "time_total_s": 26058.122532606125, "episode_reward_mean": -154.81526937116584, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -191.1136767254141, "policy_reward_mean": {}, "episodes_total": 5544, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.56158667514845, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_21-50-52", "training_iteration": 231, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756497052, "episode_len_mean": 50.0, "timesteps_since_restore": 277200, "time_since_restore": 26058.122532606125, "time_this_iter_s": 107.90127062797546, "iterations_since_restore": 231}
+{"timesteps_total": 278400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 99243.229, "num_steps_sampled": 278400, "update_time_ms": 2.714, "num_steps_trained": 278400, "load_time_ms": 0.616, "default": {"kl": 0.014159131795167923, "cur_lr": 4.999999873689376e-05, "entropy": 12.914978981018066, "total_loss": 33.23030471801758, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14277423918247223, "vf_explained_var": 0.9747536182403564, "vf_loss": 33.35874557495117}, "grad_time_ms": 758.329}, "pid": 3934253, "time_total_s": 26155.325921297073, "episode_reward_mean": -154.74883742173165, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -191.1136767254141, "policy_reward_mean": {}, "episodes_total": 5568, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -137.5857586828239, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_21-52-29", "training_iteration": 232, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756497149, "episode_len_mean": 50.0, "timesteps_since_restore": 278400, "time_since_restore": 26155.325921297073, "time_this_iter_s": 97.20338869094849, "iterations_since_restore": 232}
+{"timesteps_total": 279600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 99070.339, "num_steps_sampled": 279600, "update_time_ms": 2.659, "num_steps_trained": 279600, "load_time_ms": 0.619, "default": {"kl": 0.014392811805009842, "cur_lr": 4.999999873689376e-05, "entropy": 12.926675796508789, "total_loss": 25.39544105529785, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13040010631084442, "vf_explained_var": 0.9801141023635864, "vf_loss": 25.511268615722656}, "grad_time_ms": 766.947}, "pid": 3934253, "time_total_s": 26261.67698597908, "episode_reward_mean": -154.05910708740407, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -169.70926420317127, "policy_reward_mean": {}, "episodes_total": 5592, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -137.5857586828239, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_21-54-15", "training_iteration": 233, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756497255, "episode_len_mean": 50.0, "timesteps_since_restore": 279600, "time_since_restore": 26261.67698597908, "time_this_iter_s": 106.35106468200684, "iterations_since_restore": 233}
+{"timesteps_total": 280800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 99423.125, "num_steps_sampled": 280800, "update_time_ms": 2.548, "num_steps_trained": 280800, "load_time_ms": 0.608, "default": {"kl": 0.015516189858317375, "cur_lr": 4.999999873689376e-05, "entropy": 12.509271621704102, "total_loss": 37.31974792480469, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13313306868076324, "vf_explained_var": 0.9727855324745178, "vf_loss": 37.437171936035156}, "grad_time_ms": 756.131}, "pid": 3934253, "time_total_s": 26359.173065185547, "episode_reward_mean": -154.56187542044893, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -175.45024040060775, "policy_reward_mean": {}, "episodes_total": 5616, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -137.5857586828239, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_21-55-53", "training_iteration": 234, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756497353, "episode_len_mean": 50.0, "timesteps_since_restore": 280800, "time_since_restore": 26359.173065185547, "time_this_iter_s": 97.49607920646667, "iterations_since_restore": 234}
+{"timesteps_total": 282000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 99400.701, "num_steps_sampled": 282000, "update_time_ms": 2.538, "num_steps_trained": 282000, "load_time_ms": 0.61, "default": {"kl": 0.015087624080479145, "cur_lr": 4.999999873689376e-05, "entropy": 12.51517105102539, "total_loss": 36.4234619140625, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1278027892112732, "vf_explained_var": 0.9724928736686707, "vf_loss": 36.53599548339844}, "grad_time_ms": 757.099}, "pid": 3934253, "time_total_s": 26470.97898197174, "episode_reward_mean": -154.6107954352704, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -175.45024040060775, "policy_reward_mean": {}, "episodes_total": 5640, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -137.5857586828239, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_21-57-45", "training_iteration": 235, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756497465, "episode_len_mean": 50.0, "timesteps_since_restore": 282000, "time_since_restore": 26470.97898197174, "time_this_iter_s": 111.80591678619385, "iterations_since_restore": 235}
+{"timesteps_total": 283200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95406.385, "num_steps_sampled": 283200, "update_time_ms": 2.563, "num_steps_trained": 283200, "load_time_ms": 0.647, "default": {"kl": 0.015108389779925346, "cur_lr": 4.999999873689376e-05, "entropy": 12.653817176818848, "total_loss": 48.8950309753418, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13136011362075806, "vf_explained_var": 0.9658221006393433, "vf_loss": 49.01109313964844}, "grad_time_ms": 743.824}, "pid": 3934253, "time_total_s": 26542.84624195099, "episode_reward_mean": -154.18766838139035, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -175.45024040060775, "policy_reward_mean": {}, "episodes_total": 5664, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -147.4771196656932, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_21-58-56", "training_iteration": 236, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756497536, "episode_len_mean": 50.0, "timesteps_since_restore": 283200, "time_since_restore": 26542.84624195099, "time_this_iter_s": 71.86725997924805, "iterations_since_restore": 236}
+{"timesteps_total": 284400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94211.608, "num_steps_sampled": 284400, "update_time_ms": 2.569, "num_steps_trained": 284400, "load_time_ms": 0.657, "default": {"kl": 0.014272380620241165, "cur_lr": 4.999999873689376e-05, "entropy": 12.560246467590332, "total_loss": 31.587806701660156, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1402655392885208, "vf_explained_var": 0.9775816798210144, "vf_loss": 31.713619232177734}, "grad_time_ms": 752.183}, "pid": 3934253, "time_total_s": 26628.431704998016, "episode_reward_mean": -154.84538605775754, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -175.45024040060775, "policy_reward_mean": {}, "episodes_total": 5688, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -147.4771196656932, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-00-22", "training_iteration": 237, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756497622, "episode_len_mean": 50.0, "timesteps_since_restore": 284400, "time_since_restore": 26628.431704998016, "time_this_iter_s": 85.58546304702759, "iterations_since_restore": 237}
+{"timesteps_total": 285600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93826.904, "num_steps_sampled": 285600, "update_time_ms": 2.608, "num_steps_trained": 285600, "load_time_ms": 0.661, "default": {"kl": 0.015821723267436028, "cur_lr": 4.999999873689376e-05, "entropy": 12.670042037963867, "total_loss": 30.40340232849121, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12687571346759796, "vf_explained_var": 0.9778980612754822, "vf_loss": 30.514259338378906}, "grad_time_ms": 749.683}, "pid": 3934253, "time_total_s": 26705.948573827744, "episode_reward_mean": -154.4982256142866, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -172.66039303845443, "policy_reward_mean": {}, "episodes_total": 5712, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.12198176583468, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-01-40", "training_iteration": 238, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756497700, "episode_len_mean": 50.0, "timesteps_since_restore": 285600, "time_since_restore": 26705.948573827744, "time_this_iter_s": 77.51686882972717, "iterations_since_restore": 238}
+{"timesteps_total": 286800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94663.624, "num_steps_sampled": 286800, "update_time_ms": 2.587, "num_steps_trained": 286800, "load_time_ms": 0.671, "default": {"kl": 0.01557975821197033, "cur_lr": 4.999999873689376e-05, "entropy": 12.703690528869629, "total_loss": 32.40293502807617, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.139328271150589, "vf_explained_var": 0.9742265343666077, "vf_loss": 32.5264892578125}, "grad_time_ms": 743.241}, "pid": 3934253, "time_total_s": 26811.59946990013, "episode_reward_mean": -154.7133937321576, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -172.66039303845443, "policy_reward_mean": {}, "episodes_total": 5736, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.12198176583468, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-03-25", "training_iteration": 239, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756497805, "episode_len_mean": 50.0, "timesteps_since_restore": 286800, "time_since_restore": 26811.59946990013, "time_this_iter_s": 105.6508960723877, "iterations_since_restore": 239}
+{"timesteps_total": 288000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95221.891, "num_steps_sampled": 288000, "update_time_ms": 2.448, "num_steps_trained": 288000, "load_time_ms": 0.667, "default": {"kl": 0.015021582134068012, "cur_lr": 4.999999873689376e-05, "entropy": 12.68139362335205, "total_loss": 51.14398956298828, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12773293256759644, "vf_explained_var": 0.9638887047767639, "vf_loss": 51.25651550292969}, "grad_time_ms": 739.327}, "pid": 3934253, "time_total_s": 26909.918827056885, "episode_reward_mean": -155.13402580695703, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -174.34265858004116, "policy_reward_mean": {}, "episodes_total": 5760, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.12198176583468, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-05-04", "training_iteration": 240, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756497904, "episode_len_mean": 50.0, "timesteps_since_restore": 288000, "time_since_restore": 26909.918827056885, "time_this_iter_s": 98.31935715675354, "iterations_since_restore": 240}
+{"timesteps_total": 289200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95494.256, "num_steps_sampled": 289200, "update_time_ms": 2.609, "num_steps_trained": 289200, "load_time_ms": 0.661, "default": {"kl": 0.01462772861123085, "cur_lr": 4.999999873689376e-05, "entropy": 12.194981575012207, "total_loss": 20.053916931152344, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1390572488307953, "vf_explained_var": 0.9844868779182434, "vf_loss": 20.17816162109375}, "grad_time_ms": 731.619}, "pid": 3934253, "time_total_s": 27020.467235326767, "episode_reward_mean": -154.3404811106415, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -174.34265858004116, "policy_reward_mean": {}, "episodes_total": 5784, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.12198176583468, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-06-54", "training_iteration": 241, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756498014, "episode_len_mean": 50.0, "timesteps_since_restore": 289200, "time_since_restore": 27020.467235326767, "time_this_iter_s": 110.5484082698822, "iterations_since_restore": 241}
+{"timesteps_total": 290400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95563.006, "num_steps_sampled": 290400, "update_time_ms": 2.582, "num_steps_trained": 290400, "load_time_ms": 0.67, "default": {"kl": 0.016566181555390358, "cur_lr": 4.999999873689376e-05, "entropy": 12.55049991607666, "total_loss": 25.23848533630371, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13654492795467377, "vf_explained_var": 0.9805251359939575, "vf_loss": 25.35825538635254}, "grad_time_ms": 724.998}, "pid": 3934253, "time_total_s": 27118.29235434532, "episode_reward_mean": -154.16136676098563, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -174.34265858004116, "policy_reward_mean": {}, "episodes_total": 5808, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -148.8217025152694, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-08-32", "training_iteration": 242, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756498112, "episode_len_mean": 50.0, "timesteps_since_restore": 290400, "time_since_restore": 27118.29235434532, "time_this_iter_s": 97.82511901855469, "iterations_since_restore": 242}
+{"timesteps_total": 291600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94718.135, "num_steps_sampled": 291600, "update_time_ms": 2.636, "num_steps_trained": 291600, "load_time_ms": 0.67, "default": {"kl": 0.014852155931293964, "cur_lr": 4.999999873689376e-05, "entropy": 12.296875953674316, "total_loss": 28.0606689453125, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13112103939056396, "vf_explained_var": 0.9789355397224426, "vf_loss": 28.1767520904541}, "grad_time_ms": 734.402}, "pid": 3934253, "time_total_s": 27216.289939165115, "episode_reward_mean": -154.16419404181408, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -174.34265858004116, "policy_reward_mean": {}, "episodes_total": 5832, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -144.2930427633367, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-10-10", "training_iteration": 243, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756498210, "episode_len_mean": 50.0, "timesteps_since_restore": 291600, "time_since_restore": 27216.289939165115, "time_this_iter_s": 97.9975848197937, "iterations_since_restore": 243}
+{"timesteps_total": 292800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94072.851, "num_steps_sampled": 292800, "update_time_ms": 2.692, "num_steps_trained": 292800, "load_time_ms": 0.678, "default": {"kl": 0.01521742157638073, "cur_lr": 4.999999873689376e-05, "entropy": 12.436162948608398, "total_loss": 32.87732696533203, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1397152990102768, "vf_explained_var": 0.974628746509552, "vf_loss": 33.00163269042969}, "grad_time_ms": 729.856}, "pid": 3934253, "time_total_s": 27307.288112401962, "episode_reward_mean": -154.1744086331289, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -173.09618343276952, "policy_reward_mean": {}, "episodes_total": 5856, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -144.2930427633367, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-11-41", "training_iteration": 244, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756498301, "episode_len_mean": 50.0, "timesteps_since_restore": 292800, "time_since_restore": 27307.288112401962, "time_this_iter_s": 90.99817323684692, "iterations_since_restore": 244}
+{"timesteps_total": 294000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92770.585, "num_steps_sampled": 294000, "update_time_ms": 2.66, "num_steps_trained": 294000, "load_time_ms": 0.669, "default": {"kl": 0.015452582389116287, "cur_lr": 4.999999873689376e-05, "entropy": 12.431663513183594, "total_loss": 28.786949157714844, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12592917680740356, "vf_explained_var": 0.9775936603546143, "vf_loss": 28.89723777770996}, "grad_time_ms": 734.721}, "pid": 3934253, "time_total_s": 27406.1181910038, "episode_reward_mean": -154.34016486305367, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -173.09618343276952, "policy_reward_mean": {}, "episodes_total": 5880, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -144.2930427633367, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-13-20", "training_iteration": 245, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756498400, "episode_len_mean": 50.0, "timesteps_since_restore": 294000, "time_since_restore": 27406.1181910038, "time_this_iter_s": 98.83007860183716, "iterations_since_restore": 245}
+{"timesteps_total": 295200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95930.382, "num_steps_sampled": 295200, "update_time_ms": 2.688, "num_steps_trained": 295200, "load_time_ms": 0.632, "default": {"kl": 0.014374022372066975, "cur_lr": 4.999999873689376e-05, "entropy": 12.280024528503418, "total_loss": 37.74338912963867, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11980906873941422, "vf_explained_var": 0.9736410975456238, "vf_loss": 37.8486442565918}, "grad_time_ms": 747.384}, "pid": 3934253, "time_total_s": 27509.708899497986, "episode_reward_mean": -154.27605406898746, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -173.09618343276952, "policy_reward_mean": {}, "episodes_total": 5904, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -144.2930427633367, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-15-03", "training_iteration": 246, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756498503, "episode_len_mean": 50.0, "timesteps_since_restore": 295200, "time_since_restore": 27509.708899497986, "time_this_iter_s": 103.5907084941864, "iterations_since_restore": 246}
+{"timesteps_total": 296400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94837.908, "num_steps_sampled": 296400, "update_time_ms": 2.657, "num_steps_trained": 296400, "load_time_ms": 0.629, "default": {"kl": 0.01566668227314949, "cur_lr": 4.999999873689376e-05, "entropy": 12.17396068572998, "total_loss": 28.47240447998047, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12497733533382416, "vf_explained_var": 0.9772866368293762, "vf_loss": 28.58152198791504}, "grad_time_ms": 753.058}, "pid": 3934253, "time_total_s": 27584.426176071167, "episode_reward_mean": -154.29560239216406, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -175.1037563369774, "policy_reward_mean": {}, "episodes_total": 5928, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -151.14767500096642, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-16-18", "training_iteration": 247, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756498578, "episode_len_mean": 50.0, "timesteps_since_restore": 296400, "time_since_restore": 27584.426176071167, "time_this_iter_s": 74.71727657318115, "iterations_since_restore": 247}
+{"timesteps_total": 297600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 96071.038, "num_steps_sampled": 297600, "update_time_ms": 2.61, "num_steps_trained": 297600, "load_time_ms": 0.63, "default": {"kl": 0.013279435224831104, "cur_lr": 4.999999873689376e-05, "entropy": 12.279629707336426, "total_loss": 42.234100341796875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12782859802246094, "vf_explained_var": 0.9741078615188599, "vf_loss": 42.34848403930664}, "grad_time_ms": 753.748}, "pid": 3934253, "time_total_s": 27674.280586481094, "episode_reward_mean": -153.9695664898226, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -175.1037563369774, "policy_reward_mean": {}, "episodes_total": 5952, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -149.32841745117312, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-17-48", "training_iteration": 248, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756498668, "episode_len_mean": 50.0, "timesteps_since_restore": 297600, "time_since_restore": 27674.280586481094, "time_this_iter_s": 89.85441040992737, "iterations_since_restore": 248}
+{"timesteps_total": 298800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 97574.462, "num_steps_sampled": 298800, "update_time_ms": 2.616, "num_steps_trained": 298800, "load_time_ms": 0.651, "default": {"kl": 0.01410535629838705, "cur_lr": 4.999999873689376e-05, "entropy": 12.100536346435547, "total_loss": 19.91636085510254, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12572787702083588, "vf_explained_var": 0.9843169450759888, "vf_loss": 20.027809143066406}, "grad_time_ms": 758.636}, "pid": 3934253, "time_total_s": 27795.013806581497, "episode_reward_mean": -153.71891833490415, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -175.1037563369774, "policy_reward_mean": {}, "episodes_total": 5976, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -149.32841745117312, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-19-49", "training_iteration": 249, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756498789, "episode_len_mean": 50.0, "timesteps_since_restore": 298800, "time_since_restore": 27795.013806581497, "time_this_iter_s": 120.73322010040283, "iterations_since_restore": 249}
+{"timesteps_total": 300000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 97666.622, "num_steps_sampled": 300000, "update_time_ms": 2.658, "num_steps_trained": 300000, "load_time_ms": 0.651, "default": {"kl": 0.012680845335125923, "cur_lr": 4.999999873689376e-05, "entropy": 12.221396446228027, "total_loss": 41.20733642578125, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11321382969617844, "vf_explained_var": 0.9742316007614136, "vf_loss": 41.307708740234375}, "grad_time_ms": 763.965}, "pid": 3934253, "time_total_s": 27894.30849289894, "episode_reward_mean": -154.03355792080626, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -201.81562551481366, "policy_reward_mean": {}, "episodes_total": 6000, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -149.32841745117312, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-21-28", "training_iteration": 250, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756498888, "episode_len_mean": 50.0, "timesteps_since_restore": 300000, "time_since_restore": 27894.30849289894, "time_this_iter_s": 99.29468631744385, "iterations_since_restore": 250}
+{"timesteps_total": 301200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94007.579, "num_steps_sampled": 301200, "update_time_ms": 2.523, "num_steps_trained": 301200, "load_time_ms": 0.647, "default": {"kl": 0.013563835062086582, "cur_lr": 4.999999873689376e-05, "entropy": 11.968669891357422, "total_loss": 31.392887115478516, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11377114802598953, "vf_explained_var": 0.9757980704307556, "vf_loss": 31.492923736572266}, "grad_time_ms": 744.0}, "pid": 3934253, "time_total_s": 27968.066175222397, "episode_reward_mean": -153.65693731382558, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -201.81562551481366, "policy_reward_mean": {}, "episodes_total": 6024, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -149.32841745117312, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-22-42", "training_iteration": 251, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756498962, "episode_len_mean": 50.0, "timesteps_since_restore": 301200, "time_since_restore": 27968.066175222397, "time_this_iter_s": 73.75768232345581, "iterations_since_restore": 251}
+{"timesteps_total": 302400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92510.76, "num_steps_sampled": 302400, "update_time_ms": 2.579, "num_steps_trained": 302400, "load_time_ms": 0.648, "default": {"kl": 0.013669435866177082, "cur_lr": 4.999999873689376e-05, "entropy": 12.033707618713379, "total_loss": 17.814746856689453, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11148720234632492, "vf_explained_var": 0.9851120710372925, "vf_loss": 17.912391662597656}, "grad_time_ms": 751.818}, "pid": 3934253, "time_total_s": 28051.0013692379, "episode_reward_mean": -153.92671987302916, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -201.81562551481366, "policy_reward_mean": {}, "episodes_total": 6048, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -149.90883747438755, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-24-05", "training_iteration": 252, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756499045, "episode_len_mean": 50.0, "timesteps_since_restore": 302400, "time_since_restore": 28051.0013692379, "time_this_iter_s": 82.93519401550293, "iterations_since_restore": 252}
+{"timesteps_total": 303600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92284.865, "num_steps_sampled": 303600, "update_time_ms": 2.535, "num_steps_trained": 303600, "load_time_ms": 0.657, "default": {"kl": 0.01406802423298359, "cur_lr": 4.999999873689376e-05, "entropy": 12.27514362335205, "total_loss": 20.965513229370117, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1320834904909134, "vf_explained_var": 0.9830424189567566, "vf_loss": 21.083351135253906}, "grad_time_ms": 750.709}, "pid": 3934253, "time_total_s": 28146.728314638138, "episode_reward_mean": -153.97191238060424, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -201.81562551481366, "policy_reward_mean": {}, "episodes_total": 6072, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.37306239201038, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-25-41", "training_iteration": 253, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756499141, "episode_len_mean": 50.0, "timesteps_since_restore": 303600, "time_since_restore": 28146.728314638138, "time_this_iter_s": 95.72694540023804, "iterations_since_restore": 253}
+{"timesteps_total": 304800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 91604.615, "num_steps_sampled": 304800, "update_time_ms": 2.51, "num_steps_trained": 304800, "load_time_ms": 0.665, "default": {"kl": 0.014106563292443752, "cur_lr": 4.999999873689376e-05, "entropy": 12.195647239685059, "total_loss": 38.15617752075195, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1277208924293518, "vf_explained_var": 0.9729253053665161, "vf_loss": 38.269615173339844}, "grad_time_ms": 756.57}, "pid": 3934253, "time_total_s": 28230.983020067215, "episode_reward_mean": -153.40373628066334, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -175.32770252462922, "policy_reward_mean": {}, "episodes_total": 6096, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.37306239201038, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-27-05", "training_iteration": 254, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756499225, "episode_len_mean": 50.0, "timesteps_since_restore": 304800, "time_since_restore": 28230.983020067215, "time_this_iter_s": 84.25470542907715, "iterations_since_restore": 254}
+{"timesteps_total": 306000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 90304.885, "num_steps_sampled": 306000, "update_time_ms": 2.523, "num_steps_trained": 306000, "load_time_ms": 0.669, "default": {"kl": 0.014886324293911457, "cur_lr": 4.999999873689376e-05, "entropy": 12.2487211227417, "total_loss": 19.486772537231445, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13814154267311096, "vf_explained_var": 0.984747052192688, "vf_loss": 19.609840393066406}, "grad_time_ms": 749.583}, "pid": 3934253, "time_total_s": 28316.745934963226, "episode_reward_mean": -153.5950026973953, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -175.32770252462922, "policy_reward_mean": {}, "episodes_total": 6120, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.37306239201038, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-28-31", "training_iteration": 255, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756499311, "episode_len_mean": 50.0, "timesteps_since_restore": 306000, "time_since_restore": 28316.745934963226, "time_this_iter_s": 85.76291489601135, "iterations_since_restore": 255}
+{"timesteps_total": 307200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 87739.019, "num_steps_sampled": 307200, "update_time_ms": 2.509, "num_steps_trained": 307200, "load_time_ms": 0.67, "default": {"kl": 0.01419132947921753, "cur_lr": 4.999999873689376e-05, "entropy": 12.179950714111328, "total_loss": 44.02378845214844, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12743514776229858, "vf_explained_var": 0.9703550338745117, "vf_loss": 44.1368522644043}, "grad_time_ms": 743.555}, "pid": 3934253, "time_total_s": 28394.618319272995, "episode_reward_mean": -153.71739596982954, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -175.32770252462922, "policy_reward_mean": {}, "episodes_total": 6144, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.37306239201038, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-29-49", "training_iteration": 256, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756499389, "episode_len_mean": 50.0, "timesteps_since_restore": 307200, "time_since_restore": 28394.618319272995, "time_this_iter_s": 77.87238430976868, "iterations_since_restore": 256}
+{"timesteps_total": 308400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 89687.698, "num_steps_sampled": 308400, "update_time_ms": 2.549, "num_steps_trained": 308400, "load_time_ms": 0.664, "default": {"kl": 0.014530722051858902, "cur_lr": 4.999999873689376e-05, "entropy": 12.120244026184082, "total_loss": 26.150606155395508, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.125518798828125, "vf_explained_var": 0.9807274341583252, "vf_loss": 26.261411666870117}, "grad_time_ms": 747.631}, "pid": 3934253, "time_total_s": 28488.863465070724, "episode_reward_mean": -153.58796723004997, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -175.32770252462922, "policy_reward_mean": {}, "episodes_total": 6168, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.45347079017628, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-31-23", "training_iteration": 257, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756499483, "episode_len_mean": 50.0, "timesteps_since_restore": 308400, "time_since_restore": 28488.863465070724, "time_this_iter_s": 94.24514579772949, "iterations_since_restore": 257}
+{"timesteps_total": 309600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 89355.613, "num_steps_sampled": 309600, "update_time_ms": 2.538, "num_steps_trained": 309600, "load_time_ms": 0.658, "default": {"kl": 0.014274870045483112, "cur_lr": 4.999999873689376e-05, "entropy": 12.029433250427246, "total_loss": 21.19289779663086, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14656893908977509, "vf_explained_var": 0.9849755764007568, "vf_loss": 21.325014114379883}, "grad_time_ms": 743.267}, "pid": 3934253, "time_total_s": 28575.35304093361, "episode_reward_mean": -153.65874506459247, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.30001100256214, "policy_reward_mean": {}, "episodes_total": 6192, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.45347079017628, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-32-49", "training_iteration": 258, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756499569, "episode_len_mean": 50.0, "timesteps_since_restore": 309600, "time_since_restore": 28575.35304093361, "time_this_iter_s": 86.48957586288452, "iterations_since_restore": 258}
+{"timesteps_total": 310800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 87597.555, "num_steps_sampled": 310800, "update_time_ms": 2.485, "num_steps_trained": 310800, "load_time_ms": 0.628, "default": {"kl": 0.01563265360891819, "cur_lr": 4.999999873689376e-05, "entropy": 12.04366397857666, "total_loss": 24.3173885345459, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13971100747585297, "vf_explained_var": 0.9832693934440613, "vf_loss": 24.441268920898438}, "grad_time_ms": 740.667}, "pid": 3934253, "time_total_s": 28678.47874569893, "episode_reward_mean": -153.58450695244758, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.7332204385451, "policy_reward_mean": {}, "episodes_total": 6216, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.45347079017628, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-34-32", "training_iteration": 259, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756499672, "episode_len_mean": 50.0, "timesteps_since_restore": 310800, "time_since_restore": 28678.47874569893, "time_this_iter_s": 103.12570476531982, "iterations_since_restore": 259}
+{"timesteps_total": 312000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 87057.199, "num_steps_sampled": 312000, "update_time_ms": 2.422, "num_steps_trained": 312000, "load_time_ms": 0.632, "default": {"kl": 0.013417969457805157, "cur_lr": 4.999999873689376e-05, "entropy": 12.221627235412598, "total_loss": 37.70539474487305, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12108760327100754, "vf_explained_var": 0.9734055995941162, "vf_loss": 37.81289291381836}, "grad_time_ms": 740.404}, "pid": 3934253, "time_total_s": 28772.3668551445, "episode_reward_mean": -153.13736615788864, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -170.00570466065776, "policy_reward_mean": {}, "episodes_total": 6240, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -140.7502885744889, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-36-06", "training_iteration": 260, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756499766, "episode_len_mean": 50.0, "timesteps_since_restore": 312000, "time_since_restore": 28772.3668551445, "time_this_iter_s": 93.8881094455719, "iterations_since_restore": 260}
+{"timesteps_total": 313200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 89045.252, "num_steps_sampled": 313200, "update_time_ms": 2.386, "num_steps_trained": 313200, "load_time_ms": 0.631, "default": {"kl": 0.012220478616654873, "cur_lr": 4.999999873689376e-05, "entropy": 11.781728744506836, "total_loss": 74.40132904052734, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11190219968557358, "vf_explained_var": 0.9513096213340759, "vf_loss": 74.50086212158203}, "grad_time_ms": 762.39}, "pid": 3934253, "time_total_s": 28866.223863124847, "episode_reward_mean": -153.94081905848125, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -195.83850086707832, "policy_reward_mean": {}, "episodes_total": 6264, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -140.7502885744889, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-37-40", "training_iteration": 261, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756499860, "episode_len_mean": 50.0, "timesteps_since_restore": 313200, "time_since_restore": 28866.223863124847, "time_this_iter_s": 93.85700798034668, "iterations_since_restore": 261}
+{"timesteps_total": 314400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 89071.642, "num_steps_sampled": 314400, "update_time_ms": 2.394, "num_steps_trained": 314400, "load_time_ms": 0.625, "default": {"kl": 0.014541917480528355, "cur_lr": 4.999999873689376e-05, "entropy": 11.741612434387207, "total_loss": 17.651187896728516, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13121682405471802, "vf_explained_var": 0.9867935180664062, "vf_loss": 17.76767921447754}, "grad_time_ms": 775.037}, "pid": 3934253, "time_total_s": 28949.54998254776, "episode_reward_mean": -153.74686526915812, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -195.83850086707832, "policy_reward_mean": {}, "episodes_total": 6288, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -140.7502885744889, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-39-04", "training_iteration": 262, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756499944, "episode_len_mean": 50.0, "timesteps_since_restore": 314400, "time_since_restore": 28949.54998254776, "time_this_iter_s": 83.3261194229126, "iterations_since_restore": 262}
+{"timesteps_total": 315600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 88088.332, "num_steps_sampled": 315600, "update_time_ms": 2.525, "num_steps_trained": 315600, "load_time_ms": 0.613, "default": {"kl": 0.013870678842067719, "cur_lr": 4.999999873689376e-05, "entropy": 11.782343864440918, "total_loss": 18.725406646728516, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12153424322605133, "vf_explained_var": 0.9850756525993347, "vf_loss": 18.832895278930664}, "grad_time_ms": 772.561}, "pid": 3934253, "time_total_s": 29035.42023253441, "episode_reward_mean": -153.9387682014451, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -195.83850086707832, "policy_reward_mean": {}, "episodes_total": 6312, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -140.7502885744889, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-40-30", "training_iteration": 263, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756500030, "episode_len_mean": 50.0, "timesteps_since_restore": 315600, "time_since_restore": 29035.42023253441, "time_this_iter_s": 85.87024998664856, "iterations_since_restore": 263}
+{"timesteps_total": 316800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 88319.086, "num_steps_sampled": 316800, "update_time_ms": 2.537, "num_steps_trained": 316800, "load_time_ms": 0.604, "default": {"kl": 0.015589484013617039, "cur_lr": 4.999999873689376e-05, "entropy": 11.815381050109863, "total_loss": 45.58415222167969, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13468068838119507, "vf_explained_var": 0.9687883853912354, "vf_loss": 45.70304870605469}, "grad_time_ms": 773.986}, "pid": 3934253, "time_total_s": 29121.998387098312, "episode_reward_mean": -154.28611412240772, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -195.83850086707832, "policy_reward_mean": {}, "episodes_total": 6336, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -149.05647309909892, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-41-56", "training_iteration": 264, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756500116, "episode_len_mean": 50.0, "timesteps_since_restore": 316800, "time_since_restore": 29121.998387098312, "time_this_iter_s": 86.57815456390381, "iterations_since_restore": 264}
+{"timesteps_total": 318000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 88821.475, "num_steps_sampled": 318000, "update_time_ms": 2.55, "num_steps_trained": 318000, "load_time_ms": 0.607, "default": {"kl": 0.015006310306489468, "cur_lr": 4.999999873689376e-05, "entropy": 11.8653564453125, "total_loss": 29.010637283325195, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13054674863815308, "vf_explained_var": 0.9786375761032104, "vf_loss": 29.125986099243164}, "grad_time_ms": 780.571}, "pid": 3934253, "time_total_s": 29212.850786685944, "episode_reward_mean": -154.1596053466124, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -176.4381663197646, "policy_reward_mean": {}, "episodes_total": 6360, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -148.19820052487748, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-43-27", "training_iteration": 265, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756500207, "episode_len_mean": 50.0, "timesteps_since_restore": 318000, "time_since_restore": 29212.850786685944, "time_this_iter_s": 90.85239958763123, "iterations_since_restore": 265}
+{"timesteps_total": 319200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 88283.819, "num_steps_sampled": 319200, "update_time_ms": 2.533, "num_steps_trained": 319200, "load_time_ms": 0.611, "default": {"kl": 0.016107451170682907, "cur_lr": 4.999999873689376e-05, "entropy": 12.022677421569824, "total_loss": 37.852230072021484, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1311512589454651, "vf_explained_var": 0.974249541759491, "vf_loss": 37.967071533203125}, "grad_time_ms": 794.547}, "pid": 3934253, "time_total_s": 29285.48614835739, "episode_reward_mean": -154.15685653819614, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -176.4381663197646, "policy_reward_mean": {}, "episodes_total": 6384, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -136.70630152775394, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-44-40", "training_iteration": 266, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756500280, "episode_len_mean": 50.0, "timesteps_since_restore": 319200, "time_since_restore": 29285.48614835739, "time_this_iter_s": 72.63536167144775, "iterations_since_restore": 266}
+{"timesteps_total": 320400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 88743.608, "num_steps_sampled": 320400, "update_time_ms": 2.52, "num_steps_trained": 320400, "load_time_ms": 0.61, "default": {"kl": 0.014153753407299519, "cur_lr": 4.999999873689376e-05, "entropy": 11.821681022644043, "total_loss": 27.39217758178711, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12194425612688065, "vf_explained_var": 0.9820153713226318, "vf_loss": 27.499794006347656}, "grad_time_ms": 793.183}, "pid": 3934253, "time_total_s": 29384.31538414955, "episode_reward_mean": -153.96803814397418, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -176.4381663197646, "policy_reward_mean": {}, "episodes_total": 6408, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -136.70630152775394, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-46-18", "training_iteration": 267, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756500378, "episode_len_mean": 50.0, "timesteps_since_restore": 320400, "time_since_restore": 29384.31538414955, "time_this_iter_s": 98.82923579216003, "iterations_since_restore": 267}
+{"timesteps_total": 321600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 87746.756, "num_steps_sampled": 321600, "update_time_ms": 2.571, "num_steps_trained": 321600, "load_time_ms": 0.612, "default": {"kl": 0.013911773450672626, "cur_lr": 4.999999873689376e-05, "entropy": 11.718981742858887, "total_loss": 40.44329071044922, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13097091019153595, "vf_explained_var": 0.9710770845413208, "vf_loss": 40.560176849365234}, "grad_time_ms": 797.005}, "pid": 3934253, "time_total_s": 29460.875306606293, "episode_reward_mean": -153.7755560748365, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -176.4381663197646, "policy_reward_mean": {}, "episodes_total": 6432, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -136.70630152775394, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-47-35", "training_iteration": 268, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756500455, "episode_len_mean": 50.0, "timesteps_since_restore": 321600, "time_since_restore": 29460.875306606293, "time_this_iter_s": 76.55992245674133, "iterations_since_restore": 268}
+{"timesteps_total": 322800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 84188.524, "num_steps_sampled": 322800, "update_time_ms": 2.593, "num_steps_trained": 322800, "load_time_ms": 0.607, "default": {"kl": 0.016193203628063202, "cur_lr": 4.999999873689376e-05, "entropy": 11.588141441345215, "total_loss": 32.77817153930664, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13464587926864624, "vf_explained_var": 0.9762402772903442, "vf_loss": 32.896419525146484}, "grad_time_ms": 806.453}, "pid": 3934253, "time_total_s": 29528.51364827156, "episode_reward_mean": -153.4607327425086, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -171.40863771827642, "policy_reward_mean": {}, "episodes_total": 6456, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -136.70630152775394, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-48-43", "training_iteration": 269, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756500523, "episode_len_mean": 50.0, "timesteps_since_restore": 322800, "time_since_restore": 29528.51364827156, "time_this_iter_s": 67.63834166526794, "iterations_since_restore": 269}
+{"timesteps_total": 324000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 84408.661, "num_steps_sampled": 324000, "update_time_ms": 2.626, "num_steps_trained": 324000, "load_time_ms": 0.596, "default": {"kl": 0.01516958698630333, "cur_lr": 4.999999873689376e-05, "entropy": 11.557772636413574, "total_loss": 23.42417335510254, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12869912385940552, "vf_explained_var": 0.9817376732826233, "vf_loss": 23.537513732910156}, "grad_time_ms": 806.745}, "pid": 3934253, "time_total_s": 29624.60574412346, "episode_reward_mean": -153.33992347144647, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -171.40863771827642, "policy_reward_mean": {}, "episodes_total": 6480, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.72273321439698, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-50-19", "training_iteration": 270, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756500619, "episode_len_mean": 50.0, "timesteps_since_restore": 324000, "time_since_restore": 29624.60574412346, "time_this_iter_s": 96.0920958518982, "iterations_since_restore": 270}
+{"timesteps_total": 325200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 81621.057, "num_steps_sampled": 325200, "update_time_ms": 2.665, "num_steps_trained": 325200, "load_time_ms": 0.606, "default": {"kl": 0.0140716303139925, "cur_lr": 4.999999873689376e-05, "entropy": 11.617931365966797, "total_loss": 23.010072708129883, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1258062869310379, "vf_explained_var": 0.9814040660858154, "vf_loss": 23.121633529663086}, "grad_time_ms": 807.68}, "pid": 3934253, "time_total_s": 29690.5972969532, "episode_reward_mean": -153.17287745568458, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -171.40863771827642, "policy_reward_mean": {}, "episodes_total": 6504, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.72273321439698, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-51-25", "training_iteration": 271, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756500685, "episode_len_mean": 50.0, "timesteps_since_restore": 325200, "time_since_restore": 29690.5972969532, "time_this_iter_s": 65.99155282974243, "iterations_since_restore": 271}
+{"timesteps_total": 326400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 82244.568, "num_steps_sampled": 326400, "update_time_ms": 2.656, "num_steps_trained": 326400, "load_time_ms": 0.61, "default": {"kl": 0.013999907299876213, "cur_lr": 4.999999873689376e-05, "entropy": 11.388032913208008, "total_loss": 18.105144500732422, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11585116386413574, "vf_explained_var": 0.9850890040397644, "vf_loss": 18.20682144165039}, "grad_time_ms": 788.997}, "pid": 3934253, "time_total_s": 29779.971660375595, "episode_reward_mean": -152.95437416580322, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.7981294945134, "policy_reward_mean": {}, "episodes_total": 6528, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.31050554669037, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-52-54", "training_iteration": 272, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756500774, "episode_len_mean": 50.0, "timesteps_since_restore": 326400, "time_since_restore": 29779.971660375595, "time_this_iter_s": 89.3743634223938, "iterations_since_restore": 272}
+{"timesteps_total": 327600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 80115.644, "num_steps_sampled": 327600, "update_time_ms": 2.58, "num_steps_trained": 327600, "load_time_ms": 0.618, "default": {"kl": 0.014399628154933453, "cur_lr": 4.999999873689376e-05, "entropy": 11.7221097946167, "total_loss": 35.62514877319336, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12929335236549377, "vf_explained_var": 0.9736959934234619, "vf_loss": 35.739864349365234}, "grad_time_ms": 793.414}, "pid": 3934253, "time_total_s": 29844.59642982483, "episode_reward_mean": -153.57078695792043, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -180.0083391494624, "policy_reward_mean": {}, "episodes_total": 6552, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.31050554669037, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-53-59", "training_iteration": 273, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756500839, "episode_len_mean": 50.0, "timesteps_since_restore": 327600, "time_since_restore": 29844.59642982483, "time_this_iter_s": 64.62476944923401, "iterations_since_restore": 273}
+{"timesteps_total": 328800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 81093.68, "num_steps_sampled": 328800, "update_time_ms": 2.564, "num_steps_trained": 328800, "load_time_ms": 0.614, "default": {"kl": 0.013647317886352539, "cur_lr": 4.999999873689376e-05, "entropy": 11.217588424682617, "total_loss": 42.10578918457031, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10328339040279388, "vf_explained_var": 0.9690133333206177, "vf_loss": 42.19525909423828}, "grad_time_ms": 794.475}, "pid": 3934253, "time_total_s": 29940.964215040207, "episode_reward_mean": -153.39609191716633, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -180.0083391494624, "policy_reward_mean": {}, "episodes_total": 6576, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.31050554669037, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-55-35", "training_iteration": 274, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756500935, "episode_len_mean": 50.0, "timesteps_since_restore": 328800, "time_since_restore": 29940.964215040207, "time_this_iter_s": 96.36778521537781, "iterations_since_restore": 274}
+{"timesteps_total": 330000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 82563.125, "num_steps_sampled": 330000, "update_time_ms": 2.6, "num_steps_trained": 330000, "load_time_ms": 0.606, "default": {"kl": 0.014787460677325726, "cur_lr": 4.999999873689376e-05, "entropy": 11.442232131958008, "total_loss": 25.26143455505371, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11275593191385269, "vf_explained_var": 0.9793703556060791, "vf_loss": 25.359216690063477}, "grad_time_ms": 794.462}, "pid": 3934253, "time_total_s": 30046.511551856995, "episode_reward_mean": -153.3767483996174, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -180.0083391494624, "policy_reward_mean": {}, "episodes_total": 6600, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.31050554669037, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-57-21", "training_iteration": 275, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756501041, "episode_len_mean": 50.0, "timesteps_since_restore": 330000, "time_since_restore": 30046.511551856995, "time_this_iter_s": 105.54733681678772, "iterations_since_restore": 275}
+{"timesteps_total": 331200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 85483.028, "num_steps_sampled": 331200, "update_time_ms": 2.634, "num_steps_trained": 331200, "load_time_ms": 0.6, "default": {"kl": 0.015072625130414963, "cur_lr": 4.999999873689376e-05, "entropy": 11.500537872314453, "total_loss": 21.24437141418457, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13162878155708313, "vf_explained_var": 0.9849632978439331, "vf_loss": 21.36073875427246}, "grad_time_ms": 778.258}, "pid": 3934253, "time_total_s": 30148.183248519897, "episode_reward_mean": -153.38630465945496, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -180.0083391494624, "policy_reward_mean": {}, "episodes_total": 6624, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -145.2196053826522, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_22-59-02", "training_iteration": 276, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756501142, "episode_len_mean": 50.0, "timesteps_since_restore": 331200, "time_since_restore": 30148.183248519897, "time_this_iter_s": 101.67169666290283, "iterations_since_restore": 276}
+{"timesteps_total": 332400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 84406.381, "num_steps_sampled": 332400, "update_time_ms": 2.596, "num_steps_trained": 332400, "load_time_ms": 0.6, "default": {"kl": 0.014994761906564236, "cur_lr": 4.999999873689376e-05, "entropy": 11.437612533569336, "total_loss": 18.316537857055664, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12036796659231186, "vf_explained_var": 0.9855210781097412, "vf_loss": 18.421722412109375}, "grad_time_ms": 761.188}, "pid": 3934253, "time_total_s": 30236.075475215912, "episode_reward_mean": -152.83274076860297, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.82521908204325, "policy_reward_mean": {}, "episodes_total": 6648, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -145.2196053826522, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-00-30", "training_iteration": 277, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756501230, "episode_len_mean": 50.0, "timesteps_since_restore": 332400, "time_since_restore": 30236.075475215912, "time_this_iter_s": 87.8922266960144, "iterations_since_restore": 277}
+{"timesteps_total": 333600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 85115.306, "num_steps_sampled": 333600, "update_time_ms": 2.554, "num_steps_trained": 333600, "load_time_ms": 0.604, "default": {"kl": 0.013223753310739994, "cur_lr": 4.999999873689376e-05, "entropy": 11.365351676940918, "total_loss": 25.293102264404297, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12207407504320145, "vf_explained_var": 0.9820523858070374, "vf_loss": 25.40178871154785}, "grad_time_ms": 757.229}, "pid": 3934253, "time_total_s": 30319.68391394615, "episode_reward_mean": -152.7504248056896, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.82521908204325, "policy_reward_mean": {}, "episodes_total": 6672, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -145.2196053826522, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-01-54", "training_iteration": 278, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756501314, "episode_len_mean": 50.0, "timesteps_since_restore": 333600, "time_since_restore": 30319.68391394615, "time_this_iter_s": 83.60843873023987, "iterations_since_restore": 278}
+{"timesteps_total": 334800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 85352.692, "num_steps_sampled": 334800, "update_time_ms": 2.507, "num_steps_trained": 334800, "load_time_ms": 0.601, "default": {"kl": 0.010769886896014214, "cur_lr": 4.999999873689376e-05, "entropy": 11.56566333770752, "total_loss": 98.25940704345703, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10573761910200119, "vf_explained_var": 0.9470511674880981, "vf_loss": 98.354248046875}, "grad_time_ms": 751.919}, "pid": 3934253, "time_total_s": 30389.643027305603, "episode_reward_mean": -153.30131133278667, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -208.3227003464183, "policy_reward_mean": {}, "episodes_total": 6696, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -145.2196053826522, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-03-04", "training_iteration": 279, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756501384, "episode_len_mean": 50.0, "timesteps_since_restore": 334800, "time_since_restore": 30389.643027305603, "time_this_iter_s": 69.9591133594513, "iterations_since_restore": 279}
+{"timesteps_total": 336000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 85807.115, "num_steps_sampled": 336000, "update_time_ms": 2.518, "num_steps_trained": 336000, "load_time_ms": 0.604, "default": {"kl": 0.015613911673426628, "cur_lr": 4.999999873689376e-05, "entropy": 11.319066047668457, "total_loss": 13.783968925476074, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12649664282798767, "vf_explained_var": 0.9884146451950073, "vf_loss": 13.89465618133545}, "grad_time_ms": 742.985}, "pid": 3934253, "time_total_s": 30490.19049167633, "episode_reward_mean": -153.73297503235312, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -208.3227003464183, "policy_reward_mean": {}, "episodes_total": 6720, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -145.24336047937695, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-04-44", "training_iteration": 280, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756501484, "episode_len_mean": 50.0, "timesteps_since_restore": 336000, "time_since_restore": 30490.19049167633, "time_this_iter_s": 100.54746437072754, "iterations_since_restore": 280}
+{"timesteps_total": 337200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 89366.171, "num_steps_sampled": 337200, "update_time_ms": 2.458, "num_steps_trained": 337200, "load_time_ms": 0.598, "default": {"kl": 0.01393085066229105, "cur_lr": 4.999999873689376e-05, "entropy": 11.271801948547363, "total_loss": 63.68611526489258, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12504935264587402, "vf_explained_var": 0.9535910487174988, "vf_loss": 63.79706573486328}, "grad_time_ms": 735.058}, "pid": 3934253, "time_total_s": 30591.69241476059, "episode_reward_mean": -154.0611767016651, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -208.3227003464183, "policy_reward_mean": {}, "episodes_total": 6744, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -145.24336047937695, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-06-26", "training_iteration": 281, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756501586, "episode_len_mean": 50.0, "timesteps_since_restore": 337200, "time_since_restore": 30591.69241476059, "time_this_iter_s": 101.50192308425903, "iterations_since_restore": 281}
+{"timesteps_total": 338400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 89531.295, "num_steps_sampled": 338400, "update_time_ms": 2.404, "num_steps_trained": 338400, "load_time_ms": 0.6, "default": {"kl": 0.01404589880257845, "cur_lr": 4.999999873689376e-05, "entropy": 11.205625534057617, "total_loss": 21.548248291015625, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12210464477539062, "vf_explained_var": 0.9836018681526184, "vf_loss": 21.656131744384766}, "grad_time_ms": 748.553}, "pid": 3934253, "time_total_s": 30682.852532863617, "episode_reward_mean": -154.06282255443577, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -208.3227003464183, "policy_reward_mean": {}, "episodes_total": 6768, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -146.79730571525536, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-07-57", "training_iteration": 282, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756501677, "episode_len_mean": 50.0, "timesteps_since_restore": 338400, "time_since_restore": 30682.852532863617, "time_this_iter_s": 91.16011810302734, "iterations_since_restore": 282}
+{"timesteps_total": 339600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93866.94, "num_steps_sampled": 339600, "update_time_ms": 2.363, "num_steps_trained": 339600, "load_time_ms": 0.602, "default": {"kl": 0.013868219219148159, "cur_lr": 4.999999873689376e-05, "entropy": 11.38871955871582, "total_loss": 22.872215270996094, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12734149396419525, "vf_explained_var": 0.9838337302207947, "vf_loss": 22.985517501831055}, "grad_time_ms": 741.84}, "pid": 3934253, "time_total_s": 30790.766562223434, "episode_reward_mean": -153.490601175543, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -186.98396846066603, "policy_reward_mean": {}, "episodes_total": 6792, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -146.79730571525536, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-09-45", "training_iteration": 283, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756501785, "episode_len_mean": 50.0, "timesteps_since_restore": 339600, "time_since_restore": 30790.766562223434, "time_this_iter_s": 107.9140293598175, "iterations_since_restore": 283}
+{"timesteps_total": 340800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92716.905, "num_steps_sampled": 340800, "update_time_ms": 2.316, "num_steps_trained": 340800, "load_time_ms": 0.606, "default": {"kl": 0.01389007456600666, "cur_lr": 4.999999873689376e-05, "entropy": 11.565324783325195, "total_loss": 53.0439453125, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12358132749795914, "vf_explained_var": 0.9608864188194275, "vf_loss": 53.15346145629883}, "grad_time_ms": 743.247}, "pid": 3934253, "time_total_s": 30875.64744758606, "episode_reward_mean": -153.8480949080955, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -186.98396846066603, "policy_reward_mean": {}, "episodes_total": 6816, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -146.79730571525536, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-11-10", "training_iteration": 284, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756501870, "episode_len_mean": 50.0, "timesteps_since_restore": 340800, "time_since_restore": 30875.64744758606, "time_this_iter_s": 84.88088536262512, "iterations_since_restore": 284}
+{"timesteps_total": 342000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 90635.975, "num_steps_sampled": 342000, "update_time_ms": 2.262, "num_steps_trained": 342000, "load_time_ms": 0.611, "default": {"kl": 0.014106114394962788, "cur_lr": 4.999999873689376e-05, "entropy": 11.181747436523438, "total_loss": 36.09983825683594, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11997734010219574, "vf_explained_var": 0.9704306125640869, "vf_loss": 36.20553207397461}, "grad_time_ms": 743.898}, "pid": 3934253, "time_total_s": 30960.3914706707, "episode_reward_mean": -153.69546458851175, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -184.65606171714566, "policy_reward_mean": {}, "episodes_total": 6840, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -136.9307972088323, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-12-35", "training_iteration": 285, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756501955, "episode_len_mean": 50.0, "timesteps_since_restore": 342000, "time_since_restore": 30960.3914706707, "time_this_iter_s": 84.7440230846405, "iterations_since_restore": 285}
+{"timesteps_total": 343200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 90821.993, "num_steps_sampled": 343200, "update_time_ms": 2.237, "num_steps_trained": 343200, "load_time_ms": 0.614, "default": {"kl": 0.014162329956889153, "cur_lr": 4.999999873689376e-05, "entropy": 11.337715148925781, "total_loss": 29.2408447265625, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13232357800006866, "vf_explained_var": 0.9764517545700073, "vf_loss": 29.35883331298828}, "grad_time_ms": 762.872}, "pid": 3934253, "time_total_s": 31064.113805532455, "episode_reward_mean": -153.9291175871248, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -184.65606171714566, "policy_reward_mean": {}, "episodes_total": 6864, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -136.9307972088323, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-14-19", "training_iteration": 286, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756502059, "episode_len_mean": 50.0, "timesteps_since_restore": 343200, "time_since_restore": 31064.113805532455, "time_this_iter_s": 103.72233486175537, "iterations_since_restore": 286}
+{"timesteps_total": 344400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 91537.605, "num_steps_sampled": 344400, "update_time_ms": 2.268, "num_steps_trained": 344400, "load_time_ms": 0.618, "default": {"kl": 0.015963837504386902, "cur_lr": 4.999999873689376e-05, "entropy": 11.328529357910156, "total_loss": 17.16999053955078, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.131949320435524, "vf_explained_var": 0.9855950474739075, "vf_loss": 17.285778045654297}, "grad_time_ms": 770.754}, "pid": 3934253, "time_total_s": 31159.240578889847, "episode_reward_mean": -154.0807793508338, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -184.65606171714566, "policy_reward_mean": {}, "episodes_total": 6888, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -136.9307972088323, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-15-54", "training_iteration": 287, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756502154, "episode_len_mean": 50.0, "timesteps_since_restore": 344400, "time_since_restore": 31159.240578889847, "time_this_iter_s": 95.12677335739136, "iterations_since_restore": 287}
+{"timesteps_total": 345600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93518.384, "num_steps_sampled": 345600, "update_time_ms": 2.275, "num_steps_trained": 345600, "load_time_ms": 0.618, "default": {"kl": 0.014267970807850361, "cur_lr": 4.999999873689376e-05, "entropy": 11.001362800598145, "total_loss": 29.061933517456055, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1258041262626648, "vf_explained_var": 0.9764705300331116, "vf_loss": 29.17329216003418}, "grad_time_ms": 761.355}, "pid": 3934253, "time_total_s": 31262.563413619995, "episode_reward_mean": -153.38289002657675, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -169.46693858971975, "policy_reward_mean": {}, "episodes_total": 6912, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -136.9307972088323, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-17-37", "training_iteration": 288, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756502257, "episode_len_mean": 50.0, "timesteps_since_restore": 345600, "time_since_restore": 31262.563413619995, "time_this_iter_s": 103.32283473014832, "iterations_since_restore": 288}
+{"timesteps_total": 346800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94912.713, "num_steps_sampled": 346800, "update_time_ms": 2.331, "num_steps_trained": 346800, "load_time_ms": 0.635, "default": {"kl": 0.013144236989319324, "cur_lr": 4.999999873689376e-05, "entropy": 11.226943016052246, "total_loss": 24.29330062866211, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1239776462316513, "vf_explained_var": 0.9844390153884888, "vf_loss": 24.40397071838379}, "grad_time_ms": 762.003}, "pid": 3934253, "time_total_s": 31346.472144842148, "episode_reward_mean": -153.57627731987313, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -170.88801007674104, "policy_reward_mean": {}, "episodes_total": 6936, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -144.3950308917359, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-19-01", "training_iteration": 289, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756502341, "episode_len_mean": 50.0, "timesteps_since_restore": 346800, "time_since_restore": 31346.472144842148, "time_this_iter_s": 83.90873122215271, "iterations_since_restore": 289}
+{"timesteps_total": 348000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95128.303, "num_steps_sampled": 348000, "update_time_ms": 2.287, "num_steps_trained": 348000, "load_time_ms": 0.634, "default": {"kl": 0.015516340732574463, "cur_lr": 4.999999873689376e-05, "entropy": 11.11108112335205, "total_loss": 22.668201446533203, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13011670112609863, "vf_explained_var": 0.9813645482063293, "vf_loss": 22.782609939575195}, "grad_time_ms": 768.12}, "pid": 3934253, "time_total_s": 31449.235904693604, "episode_reward_mean": -153.4808211215403, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -170.88801007674104, "policy_reward_mean": {}, "episodes_total": 6960, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -144.3950308917359, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-20-44", "training_iteration": 290, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756502444, "episode_len_mean": 50.0, "timesteps_since_restore": 348000, "time_since_restore": 31449.235904693604, "time_this_iter_s": 102.76375985145569, "iterations_since_restore": 290}
+{"timesteps_total": 349200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95007.017, "num_steps_sampled": 349200, "update_time_ms": 2.297, "num_steps_trained": 349200, "load_time_ms": 0.638, "default": {"kl": 0.013895703479647636, "cur_lr": 4.999999873689376e-05, "entropy": 11.218277931213379, "total_loss": 53.47324752807617, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12617962062358856, "vf_explained_var": 0.9618358612060547, "vf_loss": 53.585357666015625}, "grad_time_ms": 768.239}, "pid": 3934253, "time_total_s": 31549.526314735413, "episode_reward_mean": -153.55709118338893, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -185.80293929008243, "policy_reward_mean": {}, "episodes_total": 6984, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -144.3950308917359, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-22-24", "training_iteration": 291, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756502544, "episode_len_mean": 50.0, "timesteps_since_restore": 349200, "time_since_restore": 31549.526314735413, "time_this_iter_s": 100.29041004180908, "iterations_since_restore": 291}
+{"timesteps_total": 350400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95377.448, "num_steps_sampled": 350400, "update_time_ms": 2.285, "num_steps_trained": 350400, "load_time_ms": 0.635, "default": {"kl": 0.013131446205079556, "cur_lr": 4.999999873689376e-05, "entropy": 11.09090805053711, "total_loss": 13.908455848693848, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13256239891052246, "vf_explained_var": 0.9884033203125, "vf_loss": 14.027721405029297}, "grad_time_ms": 772.362}, "pid": 3934253, "time_total_s": 31644.43196439743, "episode_reward_mean": -153.58899087363505, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -185.80293929008243, "policy_reward_mean": {}, "episodes_total": 7008, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -144.3950308917359, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-23-59", "training_iteration": 292, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756502639, "episode_len_mean": 50.0, "timesteps_since_restore": 350400, "time_since_restore": 31644.43196439743, "time_this_iter_s": 94.90564966201782, "iterations_since_restore": 292}
+{"timesteps_total": 351600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95129.044, "num_steps_sampled": 351600, "update_time_ms": 2.323, "num_steps_trained": 351600, "load_time_ms": 0.626, "default": {"kl": 0.01519844401627779, "cur_lr": 4.999999873689376e-05, "entropy": 11.170174598693848, "total_loss": 22.778303146362305, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1352260261774063, "vf_explained_var": 0.9831691384315491, "vf_loss": 22.89813995361328}, "grad_time_ms": 772.593}, "pid": 3934253, "time_total_s": 31749.863520383835, "episode_reward_mean": -153.29731566182426, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -185.80293929008243, "policy_reward_mean": {}, "episodes_total": 7032, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -145.8788879310617, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-25-44", "training_iteration": 293, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756502744, "episode_len_mean": 50.0, "timesteps_since_restore": 351600, "time_since_restore": 31749.863520383835, "time_this_iter_s": 105.43155598640442, "iterations_since_restore": 293}
+{"timesteps_total": 352800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 96644.061, "num_steps_sampled": 352800, "update_time_ms": 2.419, "num_steps_trained": 352800, "load_time_ms": 0.622, "default": {"kl": 0.01330583542585373, "cur_lr": 4.999999873689376e-05, "entropy": 11.097810745239258, "total_loss": 36.368648529052734, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12001624703407288, "vf_explained_var": 0.9719719290733337, "vf_loss": 36.47519302368164}, "grad_time_ms": 769.725}, "pid": 3934253, "time_total_s": 31849.86645746231, "episode_reward_mean": -153.5049688801624, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -185.80293929008243, "policy_reward_mean": {}, "episodes_total": 7056, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.82080949651424, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-27-24", "training_iteration": 294, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756502844, "episode_len_mean": 50.0, "timesteps_since_restore": 352800, "time_since_restore": 31849.86645746231, "time_this_iter_s": 100.00293707847595, "iterations_since_restore": 294}
+{"timesteps_total": 354000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 96999.372, "num_steps_sampled": 354000, "update_time_ms": 2.436, "num_steps_trained": 354000, "load_time_ms": 0.627, "default": {"kl": 0.015100941061973572, "cur_lr": 4.999999873689376e-05, "entropy": 11.145347595214844, "total_loss": 31.30360221862793, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12340303510427475, "vf_explained_var": 0.9782091975212097, "vf_loss": 31.41171646118164}, "grad_time_ms": 767.442}, "pid": 3934253, "time_total_s": 31938.141626119614, "episode_reward_mean": -153.3523614012057, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -181.32258316814773, "policy_reward_mean": {}, "episodes_total": 7080, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.15408264827664, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-28-53", "training_iteration": 295, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756502933, "episode_len_mean": 50.0, "timesteps_since_restore": 354000, "time_since_restore": 31938.141626119614, "time_this_iter_s": 88.27516865730286, "iterations_since_restore": 295}
+{"timesteps_total": 355200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94766.538, "num_steps_sampled": 355200, "update_time_ms": 2.442, "num_steps_trained": 355200, "load_time_ms": 0.634, "default": {"kl": 0.015226011164486408, "cur_lr": 4.999999873689376e-05, "entropy": 10.920625686645508, "total_loss": 17.984262466430664, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12347279489040375, "vf_explained_var": 0.9853192567825317, "vf_loss": 18.092321395874023}, "grad_time_ms": 759.294}, "pid": 3934253, "time_total_s": 32019.453699350357, "episode_reward_mean": -153.4454585060366, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -181.32258316814773, "policy_reward_mean": {}, "episodes_total": 7104, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.15408264827664, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-30-14", "training_iteration": 296, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756503014, "episode_len_mean": 50.0, "timesteps_since_restore": 355200, "time_since_restore": 32019.453699350357, "time_this_iter_s": 81.31207323074341, "iterations_since_restore": 296}
+{"timesteps_total": 356400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92443.191, "num_steps_sampled": 356400, "update_time_ms": 2.467, "num_steps_trained": 356400, "load_time_ms": 0.664, "default": {"kl": 0.014233733527362347, "cur_lr": 4.999999873689376e-05, "entropy": 10.99919605255127, "total_loss": 12.16675853729248, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1281324028968811, "vf_explained_var": 0.99040287733078, "vf_loss": 12.280479431152344}, "grad_time_ms": 760.707}, "pid": 3934253, "time_total_s": 32091.361676692963, "episode_reward_mean": -153.30487583861384, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -181.32258316814773, "policy_reward_mean": {}, "episodes_total": 7128, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.15408264827664, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-31-26", "training_iteration": 297, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756503086, "episode_len_mean": 50.0, "timesteps_since_restore": 356400, "time_since_restore": 32091.361676692963, "time_this_iter_s": 71.90797734260559, "iterations_since_restore": 297}
+{"timesteps_total": 357600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92704.121, "num_steps_sampled": 357600, "update_time_ms": 2.481, "num_steps_trained": 357600, "load_time_ms": 0.66, "default": {"kl": 0.013451273553073406, "cur_lr": 4.999999873689376e-05, "entropy": 11.026782989501953, "total_loss": 21.95667266845703, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1233801320195198, "vf_explained_var": 0.9825711846351624, "vf_loss": 22.06643295288086}, "grad_time_ms": 762.934}, "pid": 3934253, "time_total_s": 32197.31569838524, "episode_reward_mean": -152.81307272750516, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -163.96797787962552, "policy_reward_mean": {}, "episodes_total": 7152, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.15408264827664, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-33-12", "training_iteration": 298, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756503192, "episode_len_mean": 50.0, "timesteps_since_restore": 357600, "time_since_restore": 32197.31569838524, "time_this_iter_s": 105.954021692276, "iterations_since_restore": 298}
+{"timesteps_total": 358800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94347.891, "num_steps_sampled": 358800, "update_time_ms": 2.435, "num_steps_trained": 358800, "load_time_ms": 0.649, "default": {"kl": 0.015564335510134697, "cur_lr": 4.999999873689376e-05, "entropy": 11.200122833251953, "total_loss": 27.603986740112305, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14806872606277466, "vf_explained_var": 0.9808406829833984, "vf_loss": 27.73629379272461}, "grad_time_ms": 767.521}, "pid": 3934253, "time_total_s": 32297.707879304886, "episode_reward_mean": -152.7437017894222, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.73387901983173, "policy_reward_mean": {}, "episodes_total": 7176, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.15408264827664, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-34-52", "training_iteration": 299, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756503292, "episode_len_mean": 50.0, "timesteps_since_restore": 358800, "time_since_restore": 32297.707879304886, "time_this_iter_s": 100.39218091964722, "iterations_since_restore": 299}
+{"timesteps_total": 360000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93078.785, "num_steps_sampled": 360000, "update_time_ms": 2.486, "num_steps_trained": 360000, "load_time_ms": 0.657, "default": {"kl": 0.014852085150778294, "cur_lr": 4.999999873689376e-05, "entropy": 11.155905723571777, "total_loss": 15.688905715942383, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14254923164844513, "vf_explained_var": 0.9873740673065186, "vf_loss": 15.81641674041748}, "grad_time_ms": 766.199}, "pid": 3934253, "time_total_s": 32387.767731428146, "episode_reward_mean": -153.14312093140904, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -169.91469154306978, "policy_reward_mean": {}, "episodes_total": 7200, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -140.8243464522184, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-36-22", "training_iteration": 300, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756503382, "episode_len_mean": 50.0, "timesteps_since_restore": 360000, "time_since_restore": 32387.767731428146, "time_this_iter_s": 90.0598521232605, "iterations_since_restore": 300}
+{"timesteps_total": 361200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92802.854, "num_steps_sampled": 361200, "update_time_ms": 2.522, "num_steps_trained": 361200, "load_time_ms": 0.655, "default": {"kl": 0.016245905309915543, "cur_lr": 4.999999873689376e-05, "entropy": 11.024404525756836, "total_loss": 11.871007919311523, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13858658075332642, "vf_explained_var": 0.99014812707901, "vf_loss": 11.993144989013672}, "grad_time_ms": 762.687}, "pid": 3934253, "time_total_s": 32485.263649463654, "episode_reward_mean": -153.2274074502331, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -169.91469154306978, "policy_reward_mean": {}, "episodes_total": 7224, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -140.8243464522184, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-38-00", "training_iteration": 301, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756503480, "episode_len_mean": 50.0, "timesteps_since_restore": 361200, "time_since_restore": 32485.263649463654, "time_this_iter_s": 97.4959180355072, "iterations_since_restore": 301}
+{"timesteps_total": 362400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92911.094, "num_steps_sampled": 362400, "update_time_ms": 2.569, "num_steps_trained": 362400, "load_time_ms": 0.655, "default": {"kl": 0.014216229319572449, "cur_lr": 4.999999873689376e-05, "entropy": 11.159814834594727, "total_loss": 35.544677734375, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12864679098129272, "vf_explained_var": 0.9736400246620178, "vf_loss": 35.658931732177734}, "grad_time_ms": 762.43}, "pid": 3934253, "time_total_s": 32581.249537229538, "episode_reward_mean": -153.71020029202208, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -169.91469154306978, "policy_reward_mean": {}, "episodes_total": 7248, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -149.21272310850614, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-39-36", "training_iteration": 302, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756503576, "episode_len_mean": 50.0, "timesteps_since_restore": 362400, "time_since_restore": 32581.249537229538, "time_this_iter_s": 95.9858877658844, "iterations_since_restore": 302}
+{"timesteps_total": 363600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 88828.401, "num_steps_sampled": 363600, "update_time_ms": 2.568, "num_steps_trained": 363600, "load_time_ms": 0.653, "default": {"kl": 0.015200129710137844, "cur_lr": 4.999999873689376e-05, "entropy": 11.10995101928711, "total_loss": 23.112335205078125, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1372426450252533, "vf_explained_var": 0.9830207824707031, "vf_loss": 23.23418617248535}, "grad_time_ms": 763.732}, "pid": 3934253, "time_total_s": 32645.868771076202, "episode_reward_mean": -153.62237696956078, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -169.91469154306978, "policy_reward_mean": {}, "episodes_total": 7272, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -148.23228434829258, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-40-41", "training_iteration": 303, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756503641, "episode_len_mean": 50.0, "timesteps_since_restore": 363600, "time_since_restore": 32645.868771076202, "time_this_iter_s": 64.61923384666443, "iterations_since_restore": 303}
+{"timesteps_total": 364800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 89234.035, "num_steps_sampled": 364800, "update_time_ms": 2.594, "num_steps_trained": 364800, "load_time_ms": 0.651, "default": {"kl": 0.014623595401644707, "cur_lr": 4.999999873689376e-05, "entropy": 10.935812950134277, "total_loss": 18.714929580688477, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12618975341320038, "vf_explained_var": 0.985697329044342, "vf_loss": 18.826313018798828}, "grad_time_ms": 762.85}, "pid": 3934253, "time_total_s": 32749.919049024582, "episode_reward_mean": -153.52869796702987, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.35021138292797, "policy_reward_mean": {}, "episodes_total": 7296, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -148.23228434829258, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-42-25", "training_iteration": 304, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756503745, "episode_len_mean": 50.0, "timesteps_since_restore": 364800, "time_since_restore": 32749.919049024582, "time_this_iter_s": 104.05027794837952, "iterations_since_restore": 304}
+{"timesteps_total": 366000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 87893.805, "num_steps_sampled": 366000, "update_time_ms": 2.593, "num_steps_trained": 366000, "load_time_ms": 0.643, "default": {"kl": 0.015481146052479744, "cur_lr": 4.999999873689376e-05, "entropy": 11.092779159545898, "total_loss": 23.730798721313477, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14704284071922302, "vf_explained_var": 0.9847856163978577, "vf_loss": 23.86216926574707}, "grad_time_ms": 753.279}, "pid": 3934253, "time_total_s": 32824.69520068169, "episode_reward_mean": -154.023138854144, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.08198004963523, "policy_reward_mean": {}, "episodes_total": 7320, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -147.8016334886118, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-43-39", "training_iteration": 305, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756503819, "episode_len_mean": 50.0, "timesteps_since_restore": 366000, "time_since_restore": 32824.69520068169, "time_this_iter_s": 74.77615165710449, "iterations_since_restore": 305}
+{"timesteps_total": 367200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 89572.983, "num_steps_sampled": 367200, "update_time_ms": 2.602, "num_steps_trained": 367200, "load_time_ms": 0.638, "default": {"kl": 0.013067873194813728, "cur_lr": 4.999999873689376e-05, "entropy": 10.859930992126465, "total_loss": 31.82198143005371, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14084021747112274, "vf_explained_var": 0.9786883592605591, "vf_loss": 31.949594497680664}, "grad_time_ms": 726.482}, "pid": 3934253, "time_total_s": 32922.53137564659, "episode_reward_mean": -153.78323260138052, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.08198004963523, "policy_reward_mean": {}, "episodes_total": 7344, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -147.8016334886118, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-45-17", "training_iteration": 306, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756503917, "episode_len_mean": 50.0, "timesteps_since_restore": 367200, "time_since_restore": 32922.53137564659, "time_this_iter_s": 97.83617496490479, "iterations_since_restore": 306}
+{"timesteps_total": 368400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93512.777, "num_steps_sampled": 368400, "update_time_ms": 2.596, "num_steps_trained": 368400, "load_time_ms": 0.604, "default": {"kl": 0.014852987602353096, "cur_lr": 4.999999873689376e-05, "entropy": 10.933476448059082, "total_loss": 21.214004516601562, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13704806566238403, "vf_explained_var": 0.9833498001098633, "vf_loss": 21.336013793945312}, "grad_time_ms": 711.308}, "pid": 3934253, "time_total_s": 33033.6856508255, "episode_reward_mean": -153.88048444856662, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -170.91292767388077, "policy_reward_mean": {}, "episodes_total": 7368, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -147.8016334886118, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-47-08", "training_iteration": 307, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756504028, "episode_len_mean": 50.0, "timesteps_since_restore": 368400, "time_since_restore": 33033.6856508255, "time_this_iter_s": 111.1542751789093, "iterations_since_restore": 307}
+{"timesteps_total": 369600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 91802.15, "num_steps_sampled": 369600, "update_time_ms": 2.62, "num_steps_trained": 369600, "load_time_ms": 0.612, "default": {"kl": 0.01284022256731987, "cur_lr": 4.999999873689376e-05, "entropy": 11.249340057373047, "total_loss": 48.84939193725586, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14142972230911255, "vf_explained_var": 0.9649655818939209, "vf_loss": 48.977821350097656}, "grad_time_ms": 709.303}, "pid": 3934253, "time_total_s": 33122.514219760895, "episode_reward_mean": -154.2712712317214, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -186.36841074023712, "policy_reward_mean": {}, "episodes_total": 7392, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -144.26847544598456, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-48-37", "training_iteration": 308, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756504117, "episode_len_mean": 50.0, "timesteps_since_restore": 369600, "time_since_restore": 33122.514219760895, "time_this_iter_s": 88.82856893539429, "iterations_since_restore": 308}
+{"timesteps_total": 370800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 90123.807, "num_steps_sampled": 370800, "update_time_ms": 2.665, "num_steps_trained": 370800, "load_time_ms": 0.604, "default": {"kl": 0.013471885584294796, "cur_lr": 4.999999873689376e-05, "entropy": 11.025983810424805, "total_loss": 25.35476303100586, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13464468717575073, "vf_explained_var": 0.9827299118041992, "vf_loss": 25.475767135620117}, "grad_time_ms": 703.0}, "pid": 3934253, "time_total_s": 33206.06060504913, "episode_reward_mean": -153.94511450306916, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -186.36841074023712, "policy_reward_mean": {}, "episodes_total": 7416, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.45030726659775, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-50-01", "training_iteration": 309, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756504201, "episode_len_mean": 50.0, "timesteps_since_restore": 370800, "time_since_restore": 33206.06060504913, "time_this_iter_s": 83.54638528823853, "iterations_since_restore": 309}
+{"timesteps_total": 372000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 90496.011, "num_steps_sampled": 372000, "update_time_ms": 2.641, "num_steps_trained": 372000, "load_time_ms": 0.597, "default": {"kl": 0.01515925396233797, "cur_lr": 4.999999873689376e-05, "entropy": 11.127731323242188, "total_loss": 23.858789443969727, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12599676847457886, "vf_explained_var": 0.9824094772338867, "vf_loss": 23.969438552856445}, "grad_time_ms": 712.061}, "pid": 3934253, "time_total_s": 33299.933065891266, "episode_reward_mean": -154.24905917335306, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -186.36841074023712, "policy_reward_mean": {}, "episodes_total": 7440, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.46524261832909, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-51-35", "training_iteration": 310, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756504295, "episode_len_mean": 50.0, "timesteps_since_restore": 372000, "time_since_restore": 33299.933065891266, "time_this_iter_s": 93.87246084213257, "iterations_since_restore": 310}
+{"timesteps_total": 373200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92302.612, "num_steps_sampled": 373200, "update_time_ms": 2.63, "num_steps_trained": 373200, "load_time_ms": 0.608, "default": {"kl": 0.015349972993135452, "cur_lr": 4.999999873689376e-05, "entropy": 10.800884246826172, "total_loss": 13.16865348815918, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1375599354505539, "vf_explained_var": 0.9889466762542725, "vf_loss": 13.290670394897461}, "grad_time_ms": 720.193}, "pid": 3934253, "time_total_s": 33415.57654643059, "episode_reward_mean": -153.81849049903275, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -186.36841074023712, "policy_reward_mean": {}, "episodes_total": 7464, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.46524261832909, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-53-30", "training_iteration": 311, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756504410, "episode_len_mean": 50.0, "timesteps_since_restore": 373200, "time_since_restore": 33415.57654643059, "time_this_iter_s": 115.6434805393219, "iterations_since_restore": 311}
+{"timesteps_total": 374400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92804.591, "num_steps_sampled": 374400, "update_time_ms": 2.579, "num_steps_trained": 374400, "load_time_ms": 0.603, "default": {"kl": 0.014131312258541584, "cur_lr": 4.999999873689376e-05, "entropy": 11.05422592163086, "total_loss": 23.799354553222656, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13450416922569275, "vf_explained_var": 0.9824861884117126, "vf_loss": 23.91954803466797}, "grad_time_ms": 710.768}, "pid": 3934253, "time_total_s": 33516.487151145935, "episode_reward_mean": -153.691471397228, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -174.5455242556761, "policy_reward_mean": {}, "episodes_total": 7488, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.46524261832909, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-55-11", "training_iteration": 312, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756504511, "episode_len_mean": 50.0, "timesteps_since_restore": 374400, "time_since_restore": 33516.487151145935, "time_this_iter_s": 100.91060471534729, "iterations_since_restore": 312}
+{"timesteps_total": 375600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 96374.432, "num_steps_sampled": 375600, "update_time_ms": 2.526, "num_steps_trained": 375600, "load_time_ms": 0.606, "default": {"kl": 0.014769317582249641, "cur_lr": 4.999999873689376e-05, "entropy": 10.969765663146973, "total_loss": 24.39408302307129, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1261298954486847, "vf_explained_var": 0.980952799320221, "vf_loss": 24.505258560180664}, "grad_time_ms": 700.257}, "pid": 3934253, "time_total_s": 33616.69808459282, "episode_reward_mean": -153.32532619977394, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -174.5455242556761, "policy_reward_mean": {}, "episodes_total": 7512, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -138.3540792562646, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-56-52", "training_iteration": 313, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756504612, "episode_len_mean": 50.0, "timesteps_since_restore": 375600, "time_since_restore": 33616.69808459282, "time_this_iter_s": 100.21093344688416, "iterations_since_restore": 313}
+{"timesteps_total": 376800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 96242.033, "num_steps_sampled": 376800, "update_time_ms": 2.497, "num_steps_trained": 376800, "load_time_ms": 0.606, "default": {"kl": 0.012455091811716557, "cur_lr": 4.999999873689376e-05, "entropy": 10.836710929870605, "total_loss": 39.87970733642578, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10243361443281174, "vf_explained_var": 0.9777176976203918, "vf_loss": 39.96952819824219}, "grad_time_ms": 692.382}, "pid": 3934253, "time_total_s": 33719.34510588646, "episode_reward_mean": -153.10512817751962, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -174.5455242556761, "policy_reward_mean": {}, "episodes_total": 7536, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -138.3540792562646, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-58-34", "training_iteration": 314, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756504714, "episode_len_mean": 50.0, "timesteps_since_restore": 376800, "time_since_restore": 33719.34510588646, "time_this_iter_s": 102.64702129364014, "iterations_since_restore": 314}
+{"timesteps_total": 378000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 96745.536, "num_steps_sampled": 378000, "update_time_ms": 2.53, "num_steps_trained": 378000, "load_time_ms": 0.606, "default": {"kl": 0.012768601067364216, "cur_lr": 4.999999873689376e-05, "entropy": 10.945272445678711, "total_loss": 48.44010925292969, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1169797033071518, "vf_explained_var": 0.9687525629997253, "vf_loss": 48.544151306152344}, "grad_time_ms": 704.269}, "pid": 3934253, "time_total_s": 33799.27585601807, "episode_reward_mean": -153.3753794364622, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -182.4550995827381, "policy_reward_mean": {}, "episodes_total": 7560, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -138.3540792562646, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-29_23-59-54", "training_iteration": 315, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756504794, "episode_len_mean": 50.0, "timesteps_since_restore": 378000, "time_since_restore": 33799.27585601807, "time_this_iter_s": 79.93075013160706, "iterations_since_restore": 315}
+{"timesteps_total": 379200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 97378.354, "num_steps_sampled": 379200, "update_time_ms": 2.497, "num_steps_trained": 379200, "load_time_ms": 0.603, "default": {"kl": 0.014992697164416313, "cur_lr": 4.999999873689376e-05, "entropy": 10.970458984375, "total_loss": 37.55704116821289, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13573689758777618, "vf_explained_var": 0.9713044762611389, "vf_loss": 37.67759704589844}, "grad_time_ms": 729.514}, "pid": 3934253, "time_total_s": 33903.69194102287, "episode_reward_mean": -153.12148182322898, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -182.4550995827381, "policy_reward_mean": {}, "episodes_total": 7584, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -138.3540792562646, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-01-39", "training_iteration": 316, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756504899, "episode_len_mean": 50.0, "timesteps_since_restore": 379200, "time_since_restore": 33903.69194102287, "time_this_iter_s": 104.41608500480652, "iterations_since_restore": 316}
+{"timesteps_total": 380400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95875.166, "num_steps_sampled": 380400, "update_time_ms": 2.454, "num_steps_trained": 380400, "load_time_ms": 0.605, "default": {"kl": 0.014862080104649067, "cur_lr": 4.999999873689376e-05, "entropy": 10.844161987304688, "total_loss": 21.56414222717285, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12421739101409912, "vf_explained_var": 0.9830238819122314, "vf_loss": 21.67331314086914}, "grad_time_ms": 751.727}, "pid": 3934253, "time_total_s": 34000.03533434868, "episode_reward_mean": -153.2457239279507, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -182.4550995827381, "policy_reward_mean": {}, "episodes_total": 7608, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.5929949692987, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-03-15", "training_iteration": 317, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756504995, "episode_len_mean": 50.0, "timesteps_since_restore": 380400, "time_since_restore": 34000.03533434868, "time_this_iter_s": 96.34339332580566, "iterations_since_restore": 317}
+{"timesteps_total": 381600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 97708.083, "num_steps_sampled": 381600, "update_time_ms": 2.406, "num_steps_trained": 381600, "load_time_ms": 0.629, "default": {"kl": 0.014282830990850925, "cur_lr": 4.999999873689376e-05, "entropy": 10.415968894958496, "total_loss": 22.82317352294922, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1225418746471405, "vf_explained_var": 0.9830620884895325, "vf_loss": 22.931251525878906}, "grad_time_ms": 754.055}, "pid": 3934253, "time_total_s": 34107.21705150604, "episode_reward_mean": -152.96947395657688, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -182.4550995827381, "policy_reward_mean": {}, "episodes_total": 7632, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.11070441906222, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-05-02", "training_iteration": 318, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756505102, "episode_len_mean": 50.0, "timesteps_since_restore": 381600, "time_since_restore": 34107.21705150604, "time_this_iter_s": 107.18171715736389, "iterations_since_restore": 318}
+{"timesteps_total": 382800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 100184.811, "num_steps_sampled": 382800, "update_time_ms": 2.423, "num_steps_trained": 382800, "load_time_ms": 0.63, "default": {"kl": 0.01591685228049755, "cur_lr": 4.999999873689376e-05, "entropy": 10.475652694702148, "total_loss": 11.753562927246094, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13239659368991852, "vf_explained_var": 0.9901783466339111, "vf_loss": 11.869844436645508}, "grad_time_ms": 739.596}, "pid": 3934253, "time_total_s": 34215.38590621948, "episode_reward_mean": -152.57093134687875, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -175.74868372203048, "policy_reward_mean": {}, "episodes_total": 7656, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.11070441906222, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-06-50", "training_iteration": 319, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756505210, "episode_len_mean": 50.0, "timesteps_since_restore": 382800, "time_since_restore": 34215.38590621948, "time_this_iter_s": 108.16885471343994, "iterations_since_restore": 319}
+{"timesteps_total": 384000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 101709.565, "num_steps_sampled": 384000, "update_time_ms": 2.429, "num_steps_trained": 384000, "load_time_ms": 0.636, "default": {"kl": 0.014203101396560669, "cur_lr": 4.999999873689376e-05, "entropy": 10.735393524169922, "total_loss": 23.69377326965332, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12848956882953644, "vf_explained_var": 0.9825847148895264, "vf_loss": 23.80788230895996}, "grad_time_ms": 735.134}, "pid": 3934253, "time_total_s": 34324.46160006523, "episode_reward_mean": -152.58352243026727, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -175.74868372203048, "policy_reward_mean": {}, "episodes_total": 7680, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.11070441906222, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-08-39", "training_iteration": 320, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756505319, "episode_len_mean": 50.0, "timesteps_since_restore": 384000, "time_since_restore": 34324.46160006523, "time_this_iter_s": 109.0756938457489, "iterations_since_restore": 320}
+{"timesteps_total": 385200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 99365.659, "num_steps_sampled": 385200, "update_time_ms": 2.482, "num_steps_trained": 385200, "load_time_ms": 0.618, "default": {"kl": 0.014924119226634502, "cur_lr": 4.999999873689376e-05, "entropy": 10.55162525177002, "total_loss": 19.39442253112793, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12613314390182495, "vf_explained_var": 0.9840491414070129, "vf_loss": 19.50544548034668}, "grad_time_ms": 736.471}, "pid": 3934253, "time_total_s": 34416.68057346344, "episode_reward_mean": -152.4897771954675, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -170.05123202179706, "policy_reward_mean": {}, "episodes_total": 7704, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -148.94070225783665, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-10-12", "training_iteration": 321, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756505412, "episode_len_mean": 50.0, "timesteps_since_restore": 385200, "time_since_restore": 34416.68057346344, "time_this_iter_s": 92.21897339820862, "iterations_since_restore": 321}
+{"timesteps_total": 386400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 101335.245, "num_steps_sampled": 386400, "update_time_ms": 2.512, "num_steps_trained": 386400, "load_time_ms": 0.619, "default": {"kl": 0.012489722110331059, "cur_lr": 4.999999873689376e-05, "entropy": 10.611146926879883, "total_loss": 42.83867645263672, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12331356108188629, "vf_explained_var": 0.9723660349845886, "vf_loss": 42.94934844970703}, "grad_time_ms": 738.868}, "pid": 3934253, "time_total_s": 34537.31090283394, "episode_reward_mean": -153.15623692414303, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -180.00500045552593, "policy_reward_mean": {}, "episodes_total": 7728, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -148.94070225783665, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-12-12", "training_iteration": 322, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756505532, "episode_len_mean": 50.0, "timesteps_since_restore": 386400, "time_since_restore": 34537.31090283394, "time_this_iter_s": 120.63032937049866, "iterations_since_restore": 322}
+{"timesteps_total": 387600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 102548.43, "num_steps_sampled": 387600, "update_time_ms": 2.596, "num_steps_trained": 387600, "load_time_ms": 0.616, "default": {"kl": 0.013788405805826187, "cur_lr": 4.999999873689376e-05, "entropy": 10.14149284362793, "total_loss": 22.343345642089844, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11049012094736099, "vf_explained_var": 0.9818713068962097, "vf_loss": 22.43987464904785}, "grad_time_ms": 751.021}, "pid": 3934253, "time_total_s": 34649.776156425476, "episode_reward_mean": -153.4625475023141, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -180.00500045552593, "policy_reward_mean": {}, "episodes_total": 7752, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -148.94070225783665, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-14-05", "training_iteration": 323, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756505645, "episode_len_mean": 50.0, "timesteps_since_restore": 387600, "time_since_restore": 34649.776156425476, "time_this_iter_s": 112.46525359153748, "iterations_since_restore": 323}
+{"timesteps_total": 388800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 100600.696, "num_steps_sampled": 388800, "update_time_ms": 2.617, "num_steps_trained": 388800, "load_time_ms": 0.621, "default": {"kl": 0.015624160878360271, "cur_lr": 4.999999873689376e-05, "entropy": 10.596100807189941, "total_loss": 22.377880096435547, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1312115639448166, "vf_explained_var": 0.9831936955451965, "vf_loss": 22.49327278137207}, "grad_time_ms": 759.895}, "pid": 3934253, "time_total_s": 34733.0354244709, "episode_reward_mean": -153.77975317555422, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -180.00500045552593, "policy_reward_mean": {}, "episodes_total": 7776, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -148.94070225783665, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-15-28", "training_iteration": 324, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756505728, "episode_len_mean": 50.0, "timesteps_since_restore": 388800, "time_since_restore": 34733.0354244709, "time_this_iter_s": 83.25926804542542, "iterations_since_restore": 324}
+{"timesteps_total": 390000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 103876.041, "num_steps_sampled": 390000, "update_time_ms": 2.579, "num_steps_trained": 390000, "load_time_ms": 0.625, "default": {"kl": 0.01323324628174305, "cur_lr": 4.999999873689376e-05, "entropy": 10.411630630493164, "total_loss": 44.34865188598633, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1147596687078476, "vf_explained_var": 0.9732678532600403, "vf_loss": 44.450016021728516}, "grad_time_ms": 725.227}, "pid": 3934253, "time_total_s": 34845.3717956543, "episode_reward_mean": -154.14158061826183, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -180.00500045552593, "policy_reward_mean": {}, "episodes_total": 7800, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -150.57069385002504, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-17-20", "training_iteration": 325, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756505840, "episode_len_mean": 50.0, "timesteps_since_restore": 390000, "time_since_restore": 34845.3717956543, "time_this_iter_s": 112.33637118339539, "iterations_since_restore": 325}
+{"timesteps_total": 391200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 101697.002, "num_steps_sampled": 391200, "update_time_ms": 2.57, "num_steps_trained": 391200, "load_time_ms": 0.628, "default": {"kl": 0.012857540510594845, "cur_lr": 4.999999873689376e-05, "entropy": 10.455910682678223, "total_loss": 42.997108459472656, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10881756246089935, "vf_explained_var": 0.9767987132072449, "vf_loss": 43.09290313720703}, "grad_time_ms": 731.369}, "pid": 3934253, "time_total_s": 34928.06006979942, "episode_reward_mean": -154.09453792189086, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -173.1302892079539, "policy_reward_mean": {}, "episodes_total": 7824, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -150.75378690688086, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-18-43", "training_iteration": 326, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756505923, "episode_len_mean": 50.0, "timesteps_since_restore": 391200, "time_since_restore": 34928.06006979942, "time_this_iter_s": 82.68827414512634, "iterations_since_restore": 326}
+{"timesteps_total": 392400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 99412.19, "num_steps_sampled": 392400, "update_time_ms": 2.608, "num_steps_trained": 392400, "load_time_ms": 0.632, "default": {"kl": 0.013225565664470196, "cur_lr": 4.999999873689376e-05, "entropy": 10.42746353149414, "total_loss": 30.64324951171875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13256524503231049, "vf_explained_var": 0.9776370525360107, "vf_loss": 30.76242446899414}, "grad_time_ms": 726.856}, "pid": 3934253, "time_total_s": 35001.51141524315, "episode_reward_mean": -154.43401371835216, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -180.4741776622837, "policy_reward_mean": {}, "episodes_total": 7848, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -150.64127333487605, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-19-57", "training_iteration": 327, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756505997, "episode_len_mean": 50.0, "timesteps_since_restore": 392400, "time_since_restore": 35001.51141524315, "time_this_iter_s": 73.45134544372559, "iterations_since_restore": 327}
+{"timesteps_total": 393600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 98206.757, "num_steps_sampled": 393600, "update_time_ms": 2.612, "num_steps_trained": 393600, "load_time_ms": 0.598, "default": {"kl": 0.013027322478592396, "cur_lr": 4.999999873689376e-05, "entropy": 10.334811210632324, "total_loss": 25.57097053527832, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11095554381608963, "vf_explained_var": 0.9810521006584167, "vf_loss": 25.668737411499023}, "grad_time_ms": 726.985}, "pid": 3934253, "time_total_s": 35096.638957738876, "episode_reward_mean": -154.35407762027717, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -180.4741776622837, "policy_reward_mean": {}, "episodes_total": 7872, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -150.64127333487605, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-21-32", "training_iteration": 328, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756506092, "episode_len_mean": 50.0, "timesteps_since_restore": 393600, "time_since_restore": 35096.638957738876, "time_this_iter_s": 95.12754249572754, "iterations_since_restore": 328}
+{"timesteps_total": 394800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 99099.85, "num_steps_sampled": 394800, "update_time_ms": 2.616, "num_steps_trained": 394800, "load_time_ms": 0.609, "default": {"kl": 0.015124778263270855, "cur_lr": 4.999999873689376e-05, "entropy": 10.097905158996582, "total_loss": 23.35348129272461, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1195986419916153, "vf_explained_var": 0.9812294840812683, "vf_loss": 23.457765579223633}, "grad_time_ms": 747.878}, "pid": 3934253, "time_total_s": 35213.948383808136, "episode_reward_mean": -153.77713772000587, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -180.4741776622837, "policy_reward_mean": {}, "episodes_total": 7896, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -136.8694429954124, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-23-29", "training_iteration": 329, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756506209, "episode_len_mean": 50.0, "timesteps_since_restore": 394800, "time_since_restore": 35213.948383808136, "time_this_iter_s": 117.30942606925964, "iterations_since_restore": 329}
+{"timesteps_total": 396000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 96400.676, "num_steps_sampled": 396000, "update_time_ms": 2.616, "num_steps_trained": 396000, "load_time_ms": 0.608, "default": {"kl": 0.014633645303547382, "cur_lr": 4.999999873689376e-05, "entropy": 10.538222312927246, "total_loss": 20.841421127319336, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12716291844844818, "vf_explained_var": 0.9844285249710083, "vf_loss": 20.953765869140625}, "grad_time_ms": 746.647}, "pid": 3934253, "time_total_s": 35296.019594192505, "episode_reward_mean": -153.28856495343746, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -180.4741776622837, "policy_reward_mean": {}, "episodes_total": 7920, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -136.8694429954124, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-24-51", "training_iteration": 330, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756506291, "episode_len_mean": 50.0, "timesteps_since_restore": 396000, "time_since_restore": 35296.019594192505, "time_this_iter_s": 82.0712103843689, "iterations_since_restore": 330}
+{"timesteps_total": 397200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 97662.735, "num_steps_sampled": 397200, "update_time_ms": 2.72, "num_steps_trained": 397200, "load_time_ms": 0.609, "default": {"kl": 0.014507361687719822, "cur_lr": 4.999999873689376e-05, "entropy": 10.390003204345703, "total_loss": 28.46442413330078, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1269027292728424, "vf_explained_var": 0.9785017371177673, "vf_loss": 28.57663917541504}, "grad_time_ms": 737.279}, "pid": 3934253, "time_total_s": 35400.76520228386, "episode_reward_mean": -152.84106423066166, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -180.4741776622837, "policy_reward_mean": {}, "episodes_total": 7944, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -135.7076686254385, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-26-36", "training_iteration": 331, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756506396, "episode_len_mean": 50.0, "timesteps_since_restore": 397200, "time_since_restore": 35400.76520228386, "time_this_iter_s": 104.74560809135437, "iterations_since_restore": 331}
+{"timesteps_total": 398400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94954.544, "num_steps_sampled": 398400, "update_time_ms": 2.706, "num_steps_trained": 398400, "load_time_ms": 0.621, "default": {"kl": 0.014371686615049839, "cur_lr": 4.999999873689376e-05, "entropy": 10.481554985046387, "total_loss": 26.985797882080078, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13098128139972687, "vf_explained_var": 0.9793742299079895, "vf_loss": 27.10222816467285}, "grad_time_ms": 742.071}, "pid": 3934253, "time_total_s": 35494.36348748207, "episode_reward_mean": -152.3610456543385, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.77579605740746, "policy_reward_mean": {}, "episodes_total": 7968, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -135.7076686254385, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-28-10", "training_iteration": 332, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756506490, "episode_len_mean": 50.0, "timesteps_since_restore": 398400, "time_since_restore": 35494.36348748207, "time_this_iter_s": 93.59828519821167, "iterations_since_restore": 332}
+{"timesteps_total": 399600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 91593.414, "num_steps_sampled": 399600, "update_time_ms": 2.679, "num_steps_trained": 399600, "load_time_ms": 0.625, "default": {"kl": 0.013958621770143509, "cur_lr": 4.999999873689376e-05, "entropy": 10.51937484741211, "total_loss": 40.451904296875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13136720657348633, "vf_explained_var": 0.9713349938392639, "vf_loss": 40.56913757324219}, "grad_time_ms": 744.421}, "pid": 3934253, "time_total_s": 35573.24069619179, "episode_reward_mean": -152.6889494291554, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -177.64100823331634, "policy_reward_mean": {}, "episodes_total": 7992, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -135.7076686254385, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-29-28", "training_iteration": 333, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756506568, "episode_len_mean": 50.0, "timesteps_since_restore": 399600, "time_since_restore": 35573.24069619179, "time_this_iter_s": 78.8772087097168, "iterations_since_restore": 333}
+{"timesteps_total": 400800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 91368.618, "num_steps_sampled": 400800, "update_time_ms": 2.637, "num_steps_trained": 400800, "load_time_ms": 0.628, "default": {"kl": 0.015249352902173996, "cur_lr": 4.999999873689376e-05, "entropy": 10.27700424194336, "total_loss": 21.162511825561523, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11858128011226654, "vf_explained_var": 0.9836852550506592, "vf_loss": 21.26565170288086}, "grad_time_ms": 752.607}, "pid": 3934253, "time_total_s": 35654.33391952515, "episode_reward_mean": -152.6987609356839, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -177.64100823331634, "policy_reward_mean": {}, "episodes_total": 8016, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -135.7076686254385, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-30-50", "training_iteration": 334, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756506650, "episode_len_mean": 50.0, "timesteps_since_restore": 400800, "time_since_restore": 35654.33391952515, "time_this_iter_s": 81.09322333335876, "iterations_since_restore": 334}
+{"timesteps_total": 402000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 88729.159, "num_steps_sampled": 402000, "update_time_ms": 2.675, "num_steps_trained": 402000, "load_time_ms": 0.63, "default": {"kl": 0.013706881552934647, "cur_lr": 4.999999873689376e-05, "entropy": 10.258893013000488, "total_loss": 18.555627822875977, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12699751555919647, "vf_explained_var": 0.986332356929779, "vf_loss": 18.668746948242188}, "grad_time_ms": 788.154}, "pid": 3934253, "time_total_s": 35740.63249707222, "episode_reward_mean": -152.9703099260085, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -177.64100823331634, "policy_reward_mean": {}, "episodes_total": 8040, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.11140543958143, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-32-16", "training_iteration": 335, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756506736, "episode_len_mean": 50.0, "timesteps_since_restore": 402000, "time_since_restore": 35740.63249707222, "time_this_iter_s": 86.29857754707336, "iterations_since_restore": 335}
+{"timesteps_total": 403200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 91513.753, "num_steps_sampled": 403200, "update_time_ms": 2.708, "num_steps_trained": 403200, "load_time_ms": 0.627, "default": {"kl": 0.013812141492962837, "cur_lr": 4.999999873689376e-05, "entropy": 10.123869895935059, "total_loss": 17.128286361694336, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10887904465198517, "vf_explained_var": 0.9872063398361206, "vf_loss": 17.223176956176758}, "grad_time_ms": 783.817}, "pid": 3934253, "time_total_s": 35851.12422943115, "episode_reward_mean": -153.04831488940408, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -177.64100823331634, "policy_reward_mean": {}, "episodes_total": 8064, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.11140543958143, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-34-06", "training_iteration": 336, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756506846, "episode_len_mean": 50.0, "timesteps_since_restore": 403200, "time_since_restore": 35851.12422943115, "time_this_iter_s": 110.4917323589325, "iterations_since_restore": 336}
+{"timesteps_total": 404400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92711.829, "num_steps_sampled": 404400, "update_time_ms": 2.691, "num_steps_trained": 404400, "load_time_ms": 0.634, "default": {"kl": 0.013465446420013905, "cur_lr": 4.999999873689376e-05, "entropy": 10.17501449584961, "total_loss": 22.101633071899414, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12934455275535583, "vf_explained_var": 0.9826427102088928, "vf_loss": 22.217344284057617}, "grad_time_ms": 781.355}, "pid": 3934253, "time_total_s": 35936.53139543533, "episode_reward_mean": -152.91974841361036, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.6798048261915, "policy_reward_mean": {}, "episodes_total": 8088, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -144.01814896022987, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-35-32", "training_iteration": 337, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756506932, "episode_len_mean": 50.0, "timesteps_since_restore": 404400, "time_since_restore": 35936.53139543533, "time_this_iter_s": 85.40716600418091, "iterations_since_restore": 337}
+{"timesteps_total": 405600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94088.489, "num_steps_sampled": 405600, "update_time_ms": 2.633, "num_steps_trained": 405600, "load_time_ms": 0.652, "default": {"kl": 0.01327629666775465, "cur_lr": 4.999999873689376e-05, "entropy": 10.075685501098633, "total_loss": 20.67936897277832, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.128595769405365, "vf_explained_var": 0.9839978814125061, "vf_loss": 20.79452133178711}, "grad_time_ms": 781.972}, "pid": 3934253, "time_total_s": 36045.43131017685, "episode_reward_mean": -152.76994038362417, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -171.73506361888798, "policy_reward_mean": {}, "episodes_total": 8112, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -144.01814896022987, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-37-21", "training_iteration": 338, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756507041, "episode_len_mean": 50.0, "timesteps_since_restore": 405600, "time_since_restore": 36045.43131017685, "time_this_iter_s": 108.89991474151611, "iterations_since_restore": 338}
+{"timesteps_total": 406800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 91540.836, "num_steps_sampled": 406800, "update_time_ms": 2.577, "num_steps_trained": 406800, "load_time_ms": 0.653, "default": {"kl": 0.01496865227818489, "cur_lr": 4.999999873689376e-05, "entropy": 10.436251640319824, "total_loss": 32.833805084228516, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12757453322410583, "vf_explained_var": 0.9752024412155151, "vf_loss": 32.946224212646484}, "grad_time_ms": 782.538}, "pid": 3934253, "time_total_s": 36137.26930594444, "episode_reward_mean": -152.87661447578267, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -178.246255970889, "policy_reward_mean": {}, "episodes_total": 8136, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.1453355829173, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-38-53", "training_iteration": 339, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756507133, "episode_len_mean": 50.0, "timesteps_since_restore": 406800, "time_since_restore": 36137.26930594444, "time_this_iter_s": 91.83799576759338, "iterations_since_restore": 339}
+{"timesteps_total": 408000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 91825.392, "num_steps_sampled": 408000, "update_time_ms": 2.581, "num_steps_trained": 408000, "load_time_ms": 0.647, "default": {"kl": 0.014040197245776653, "cur_lr": 4.999999873689376e-05, "entropy": 10.187368392944336, "total_loss": 21.68220329284668, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13107901811599731, "vf_explained_var": 0.9829478859901428, "vf_loss": 21.799068450927734}, "grad_time_ms": 790.481}, "pid": 3934253, "time_total_s": 36222.26623415947, "episode_reward_mean": -152.5864977452388, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -178.246255970889, "policy_reward_mean": {}, "episodes_total": 8160, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.1453355829173, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-40-18", "training_iteration": 340, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756507218, "episode_len_mean": 50.0, "timesteps_since_restore": 408000, "time_since_restore": 36222.26623415947, "time_this_iter_s": 84.99692821502686, "iterations_since_restore": 340}
+{"timesteps_total": 409200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92400.945, "num_steps_sampled": 409200, "update_time_ms": 2.409, "num_steps_trained": 409200, "load_time_ms": 0.681, "default": {"kl": 0.014228183776140213, "cur_lr": 4.999999873689376e-05, "entropy": 10.1898193359375, "total_loss": 19.298744201660156, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11869990825653076, "vf_explained_var": 0.9837184548377991, "vf_loss": 19.403038024902344}, "grad_time_ms": 792.406}, "pid": 3934253, "time_total_s": 36332.785865306854, "episode_reward_mean": -152.8219143853639, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -178.246255970889, "policy_reward_mean": {}, "episodes_total": 8184, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.1453355829173, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-42-08", "training_iteration": 341, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756507328, "episode_len_mean": 50.0, "timesteps_since_restore": 409200, "time_since_restore": 36332.785865306854, "time_this_iter_s": 110.51963114738464, "iterations_since_restore": 341}
+{"timesteps_total": 410400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 91987.235, "num_steps_sampled": 410400, "update_time_ms": 2.385, "num_steps_trained": 410400, "load_time_ms": 0.673, "default": {"kl": 0.013174712657928467, "cur_lr": 4.999999873689376e-05, "entropy": 10.154784202575684, "total_loss": 17.16404914855957, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13054805994033813, "vf_explained_var": 0.9870219826698303, "vf_loss": 17.281259536743164}, "grad_time_ms": 783.421}, "pid": 3934253, "time_total_s": 36422.15473651886, "episode_reward_mean": -152.79249832994313, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -178.246255970889, "policy_reward_mean": {}, "episodes_total": 8208, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.1453355829173, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-43-38", "training_iteration": 342, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756507418, "episode_len_mean": 50.0, "timesteps_since_restore": 410400, "time_since_restore": 36422.15473651886, "time_this_iter_s": 89.36887121200562, "iterations_since_restore": 342}
+{"timesteps_total": 411600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93259.94, "num_steps_sampled": 411600, "update_time_ms": 2.39, "num_steps_trained": 411600, "load_time_ms": 0.682, "default": {"kl": 0.013398093171417713, "cur_lr": 4.999999873689376e-05, "entropy": 10.21140193939209, "total_loss": 15.42952823638916, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12429417669773102, "vf_explained_var": 0.988605260848999, "vf_loss": 15.54025650024414}, "grad_time_ms": 788.74}, "pid": 3934253, "time_total_s": 36513.812532663345, "episode_reward_mean": -152.4947968460862, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.25618485757914, "policy_reward_mean": {}, "episodes_total": 8232, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -147.63720264870892, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-45-09", "training_iteration": 343, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756507509, "episode_len_mean": 50.0, "timesteps_since_restore": 411600, "time_since_restore": 36513.812532663345, "time_this_iter_s": 91.65779614448547, "iterations_since_restore": 343}
+{"timesteps_total": 412800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94727.256, "num_steps_sampled": 412800, "update_time_ms": 2.402, "num_steps_trained": 412800, "load_time_ms": 0.673, "default": {"kl": 0.015052050352096558, "cur_lr": 4.999999873689376e-05, "entropy": 10.440613746643066, "total_loss": 25.006467819213867, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1175057590007782, "vf_explained_var": 0.9807634353637695, "vf_loss": 25.108734130859375}, "grad_time_ms": 784.412}, "pid": 3934253, "time_total_s": 36609.53568506241, "episode_reward_mean": -153.21520828059778, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -168.82503659059702, "policy_reward_mean": {}, "episodes_total": 8256, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -147.63720264870892, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-46-45", "training_iteration": 344, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756507605, "episode_len_mean": 50.0, "timesteps_since_restore": 412800, "time_since_restore": 36609.53568506241, "time_this_iter_s": 95.72315239906311, "iterations_since_restore": 344}
+{"timesteps_total": 414000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 96656.326, "num_steps_sampled": 414000, "update_time_ms": 2.368, "num_steps_trained": 414000, "load_time_ms": 0.668, "default": {"kl": 0.015476263128221035, "cur_lr": 4.999999873689376e-05, "entropy": 10.44300651550293, "total_loss": 12.492606163024902, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12842413783073425, "vf_explained_var": 0.989512026309967, "vf_loss": 12.605360984802246}, "grad_time_ms": 781.723}, "pid": 3934253, "time_total_s": 36715.097074747086, "episode_reward_mean": -153.3844868213551, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -168.82503659059702, "policy_reward_mean": {}, "episodes_total": 8280, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -147.63720264870892, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-48-30", "training_iteration": 345, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756507710, "episode_len_mean": 50.0, "timesteps_since_restore": 414000, "time_since_restore": 36715.097074747086, "time_this_iter_s": 105.56138968467712, "iterations_since_restore": 345}
+{"timesteps_total": 415200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94261.095, "num_steps_sampled": 415200, "update_time_ms": 2.333, "num_steps_trained": 415200, "load_time_ms": 0.67, "default": {"kl": 0.013878900557756424, "cur_lr": 4.999999873689376e-05, "entropy": 10.181175231933594, "total_loss": 18.723909378051758, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1245008334517479, "vf_explained_var": 0.9861525297164917, "vf_loss": 18.83435821533203}, "grad_time_ms": 768.491}, "pid": 3934253, "time_total_s": 36801.50307202339, "episode_reward_mean": -153.39538590524927, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -168.82503659059702, "policy_reward_mean": {}, "episodes_total": 8304, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -147.63720264870892, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-49-57", "training_iteration": 346, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756507797, "episode_len_mean": 50.0, "timesteps_since_restore": 415200, "time_since_restore": 36801.50307202339, "time_this_iter_s": 86.40599727630615, "iterations_since_restore": 346}
+{"timesteps_total": 416400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95108.192, "num_steps_sampled": 416400, "update_time_ms": 2.361, "num_steps_trained": 416400, "load_time_ms": 0.662, "default": {"kl": 0.014218274503946304, "cur_lr": 4.999999873689376e-05, "entropy": 10.210870742797852, "total_loss": 18.950908660888672, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12428196519613266, "vf_explained_var": 0.9845414757728577, "vf_loss": 19.060794830322266}, "grad_time_ms": 764.217}, "pid": 3934253, "time_total_s": 36895.33891892433, "episode_reward_mean": -153.16668440198112, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -168.82503659059702, "policy_reward_mean": {}, "episodes_total": 8328, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -148.03892181301913, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-51-31", "training_iteration": 347, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756507891, "episode_len_mean": 50.0, "timesteps_since_restore": 416400, "time_since_restore": 36895.33891892433, "time_this_iter_s": 93.83584690093994, "iterations_since_restore": 347}
+{"timesteps_total": 417600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 91907.856, "num_steps_sampled": 417600, "update_time_ms": 2.428, "num_steps_trained": 417600, "load_time_ms": 0.654, "default": {"kl": 0.014065904542803764, "cur_lr": 4.999999873689376e-05, "entropy": 10.311721801757812, "total_loss": 22.437252044677734, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12689092755317688, "vf_explained_var": 0.983727216720581, "vf_loss": 22.549901962280273}, "grad_time_ms": 771.482}, "pid": 3934253, "time_total_s": 36972.30895447731, "episode_reward_mean": -152.7423944307145, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.470864728126, "policy_reward_mean": {}, "episodes_total": 8352, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -149.157812667166, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-52-48", "training_iteration": 348, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756507968, "episode_len_mean": 50.0, "timesteps_since_restore": 417600, "time_since_restore": 36972.30895447731, "time_this_iter_s": 76.97003555297852, "iterations_since_restore": 348}
+{"timesteps_total": 418800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92743.246, "num_steps_sampled": 418800, "update_time_ms": 2.443, "num_steps_trained": 418800, "load_time_ms": 0.646, "default": {"kl": 0.014622226357460022, "cur_lr": 4.999999873689376e-05, "entropy": 10.095756530761719, "total_loss": 13.963083267211914, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13011474907398224, "vf_explained_var": 0.9886112213134766, "vf_loss": 14.078393936157227}, "grad_time_ms": 761.339}, "pid": 3934253, "time_total_s": 37072.39999341965, "episode_reward_mean": -152.570437889023, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -168.57609319041728, "policy_reward_mean": {}, "episodes_total": 8376, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -150.24807205629406, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-54-28", "training_iteration": 349, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756508068, "episode_len_mean": 50.0, "timesteps_since_restore": 418800, "time_since_restore": 37072.39999341965, "time_this_iter_s": 100.09103894233704, "iterations_since_restore": 349}
+{"timesteps_total": 420000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93945.555, "num_steps_sampled": 420000, "update_time_ms": 2.414, "num_steps_trained": 420000, "load_time_ms": 0.651, "default": {"kl": 0.013052679598331451, "cur_lr": 4.999999873689376e-05, "entropy": 9.948760986328125, "total_loss": 26.701265335083008, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.111075259745121, "vf_explained_var": 0.9798588156700134, "vf_loss": 26.799123764038086}, "grad_time_ms": 760.168}, "pid": 3934253, "time_total_s": 37169.40801501274, "episode_reward_mean": -152.35406502911871, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -168.57609319041728, "policy_reward_mean": {}, "episodes_total": 8400, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.43713855171399, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-56-05", "training_iteration": 350, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756508165, "episode_len_mean": 50.0, "timesteps_since_restore": 420000, "time_since_restore": 37169.40801501274, "time_this_iter_s": 97.00802159309387, "iterations_since_restore": 350}
+{"timesteps_total": 421200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93551.288, "num_steps_sampled": 421200, "update_time_ms": 2.427, "num_steps_trained": 421200, "load_time_ms": 0.614, "default": {"kl": 0.013322807848453522, "cur_lr": 4.999999873689376e-05, "entropy": 10.31839370727539, "total_loss": 39.58547592163086, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12193938344717026, "vf_explained_var": 0.9701064229011536, "vf_loss": 39.69392395019531}, "grad_time_ms": 766.501}, "pid": 3934253, "time_total_s": 37276.0480325222, "episode_reward_mean": -152.9349523520042, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -182.25825795156348, "policy_reward_mean": {}, "episodes_total": 8424, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.43713855171399, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-57-52", "training_iteration": 351, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756508272, "episode_len_mean": 50.0, "timesteps_since_restore": 421200, "time_since_restore": 37276.0480325222, "time_this_iter_s": 106.64001750946045, "iterations_since_restore": 351}
+{"timesteps_total": 422400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 96615.82, "num_steps_sampled": 422400, "update_time_ms": 2.446, "num_steps_trained": 422400, "load_time_ms": 0.613, "default": {"kl": 0.014840834774076939, "cur_lr": 4.999999873689376e-05, "entropy": 10.174718856811523, "total_loss": 31.508209228515625, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1351870745420456, "vf_explained_var": 0.9790176749229431, "vf_loss": 31.62837028503418}, "grad_time_ms": 769.346}, "pid": 3934253, "time_total_s": 37396.09178161621, "episode_reward_mean": -153.35956760196896, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -182.25825795156348, "policy_reward_mean": {}, "episodes_total": 8448, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.43713855171399, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_00-59-52", "training_iteration": 352, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756508392, "episode_len_mean": 50.0, "timesteps_since_restore": 422400, "time_since_restore": 37396.09178161621, "time_this_iter_s": 120.0437490940094, "iterations_since_restore": 352}
+{"timesteps_total": 423600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 97717.845, "num_steps_sampled": 423600, "update_time_ms": 2.44, "num_steps_trained": 423600, "load_time_ms": 0.605, "default": {"kl": 0.014833922497928143, "cur_lr": 4.999999873689376e-05, "entropy": 9.910870552062988, "total_loss": 21.269311904907227, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1376529335975647, "vf_explained_var": 0.9843950271606445, "vf_loss": 21.391944885253906}, "grad_time_ms": 759.658}, "pid": 3934253, "time_total_s": 37498.67289829254, "episode_reward_mean": -153.08415396170028, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -182.25825795156348, "policy_reward_mean": {}, "episodes_total": 8472, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.43713855171399, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-01-34", "training_iteration": 353, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756508494, "episode_len_mean": 50.0, "timesteps_since_restore": 423600, "time_since_restore": 37498.67289829254, "time_this_iter_s": 102.58111667633057, "iterations_since_restore": 353}
+{"timesteps_total": 424800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 98199.752, "num_steps_sampled": 424800, "update_time_ms": 2.418, "num_steps_trained": 424800, "load_time_ms": 0.604, "default": {"kl": 0.01393041666597128, "cur_lr": 4.999999873689376e-05, "entropy": 10.231510162353516, "total_loss": 19.14379119873047, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1328810304403305, "vf_explained_var": 0.9846649169921875, "vf_loss": 19.262569427490234}, "grad_time_ms": 751.339}, "pid": 3934253, "time_total_s": 37599.13117814064, "episode_reward_mean": -153.2069426241487, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -182.25825795156348, "policy_reward_mean": {}, "episodes_total": 8496, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.1586138095392, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-03-15", "training_iteration": 354, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756508595, "episode_len_mean": 50.0, "timesteps_since_restore": 424800, "time_since_restore": 37599.13117814064, "time_this_iter_s": 100.45827984809875, "iterations_since_restore": 354}
+{"timesteps_total": 426000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 96474.088, "num_steps_sampled": 426000, "update_time_ms": 2.442, "num_steps_trained": 426000, "load_time_ms": 0.606, "default": {"kl": 0.013703294098377228, "cur_lr": 4.999999873689376e-05, "entropy": 10.181726455688477, "total_loss": 22.11202621459961, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12982912361621857, "vf_explained_var": 0.9826943278312683, "vf_loss": 22.227983474731445}, "grad_time_ms": 754.057}, "pid": 3934253, "time_total_s": 37687.463785886765, "episode_reward_mean": -152.87976951541432, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -178.5151443402442, "policy_reward_mean": {}, "episodes_total": 8520, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.1586138095392, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-04-43", "training_iteration": 355, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756508683, "episode_len_mean": 50.0, "timesteps_since_restore": 426000, "time_since_restore": 37687.463785886765, "time_this_iter_s": 88.33260774612427, "iterations_since_restore": 355}
+{"timesteps_total": 427200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 98364.902, "num_steps_sampled": 427200, "update_time_ms": 2.483, "num_steps_trained": 427200, "load_time_ms": 0.604, "default": {"kl": 0.014798227697610855, "cur_lr": 4.999999873689376e-05, "entropy": 10.146353721618652, "total_loss": 19.512731552124023, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11665691435337067, "vf_explained_var": 0.9837243556976318, "vf_loss": 19.614402770996094}, "grad_time_ms": 762.176}, "pid": 3934253, "time_total_s": 37792.859236478806, "episode_reward_mean": -152.4326080480917, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -168.48851998476675, "policy_reward_mean": {}, "episodes_total": 8544, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.1586138095392, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-06-28", "training_iteration": 356, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756508788, "episode_len_mean": 50.0, "timesteps_since_restore": 427200, "time_since_restore": 37792.859236478806, "time_this_iter_s": 105.39545059204102, "iterations_since_restore": 356}
+{"timesteps_total": 428400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 96947.825, "num_steps_sampled": 428400, "update_time_ms": 2.416, "num_steps_trained": 428400, "load_time_ms": 0.608, "default": {"kl": 0.014719611965119839, "cur_lr": 4.999999873689376e-05, "entropy": 10.013218879699707, "total_loss": 15.863059043884277, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13711626827716827, "vf_explained_var": 0.9880774021148682, "vf_loss": 15.985271453857422}, "grad_time_ms": 772.157}, "pid": 3934253, "time_total_s": 37872.6226978302, "episode_reward_mean": -152.81097276852893, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -168.48851998476675, "policy_reward_mean": {}, "episodes_total": 8568, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.1586138095392, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-07-48", "training_iteration": 357, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756508868, "episode_len_mean": 50.0, "timesteps_since_restore": 428400, "time_since_restore": 37872.6226978302, "time_this_iter_s": 79.76346135139465, "iterations_since_restore": 357}
+{"timesteps_total": 429600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 98484.83, "num_steps_sampled": 429600, "update_time_ms": 2.418, "num_steps_trained": 429600, "load_time_ms": 0.595, "default": {"kl": 0.013437781482934952, "cur_lr": 4.999999873689376e-05, "entropy": 10.048007011413574, "total_loss": 26.254295349121094, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1137915551662445, "vf_explained_var": 0.9829705357551575, "vf_loss": 26.354480743408203}, "grad_time_ms": 767.708}, "pid": 3934253, "time_total_s": 37964.91802740097, "episode_reward_mean": -152.98573683136482, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -168.48851998476675, "policy_reward_mean": {}, "episodes_total": 8592, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.0228323504369, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-09-20", "training_iteration": 358, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756508960, "episode_len_mean": 50.0, "timesteps_since_restore": 429600, "time_since_restore": 37964.91802740097, "time_this_iter_s": 92.29532957077026, "iterations_since_restore": 358}
+{"timesteps_total": 430800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 98218.345, "num_steps_sampled": 430800, "update_time_ms": 2.451, "num_steps_trained": 430800, "load_time_ms": 0.597, "default": {"kl": 0.013722885400056839, "cur_lr": 4.999999873689376e-05, "entropy": 9.970488548278809, "total_loss": 13.448766708374023, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1254318505525589, "vf_explained_var": 0.9887028932571411, "vf_loss": 13.56030559539795}, "grad_time_ms": 777.334}, "pid": 3934253, "time_total_s": 38062.439425468445, "episode_reward_mean": -152.59503919828575, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -163.3151418152035, "policy_reward_mean": {}, "episodes_total": 8616, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.94562985426637, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-10-58", "training_iteration": 359, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756509058, "episode_len_mean": 50.0, "timesteps_since_restore": 430800, "time_since_restore": 38062.439425468445, "time_this_iter_s": 97.52139806747437, "iterations_since_restore": 359}
+{"timesteps_total": 432000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 98022.492, "num_steps_sampled": 432000, "update_time_ms": 2.485, "num_steps_trained": 432000, "load_time_ms": 0.595, "default": {"kl": 0.01442575454711914, "cur_lr": 4.999999873689376e-05, "entropy": 10.238739013671875, "total_loss": 29.425323486328125, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13105913996696472, "vf_explained_var": 0.9780151844024658, "vf_loss": 29.541778564453125}, "grad_time_ms": 778.193}, "pid": 3934253, "time_total_s": 38157.49730968475, "episode_reward_mean": -152.59521854700185, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -163.75715808807124, "policy_reward_mean": {}, "episodes_total": 8640, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.94562985426637, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-12-33", "training_iteration": 360, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756509153, "episode_len_mean": 50.0, "timesteps_since_restore": 432000, "time_since_restore": 38157.49730968475, "time_this_iter_s": 95.0578842163086, "iterations_since_restore": 360}
+{"timesteps_total": 433200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 96048.622, "num_steps_sampled": 433200, "update_time_ms": 2.476, "num_steps_trained": 433200, "load_time_ms": 0.606, "default": {"kl": 0.01245577447116375, "cur_lr": 4.999999873689376e-05, "entropy": 9.976419448852539, "total_loss": 17.008989334106445, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12027224898338318, "vf_explained_var": 0.9869714379310608, "vf_loss": 17.11665153503418}, "grad_time_ms": 785.689}, "pid": 3934253, "time_total_s": 38244.47419548035, "episode_reward_mean": -152.5972637993256, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.38796960241405, "policy_reward_mean": {}, "episodes_total": 8664, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.94562985426637, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-14-00", "training_iteration": 361, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756509240, "episode_len_mean": 50.0, "timesteps_since_restore": 433200, "time_since_restore": 38244.47419548035, "time_this_iter_s": 86.97688579559326, "iterations_since_restore": 361}
+{"timesteps_total": 434400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94103.607, "num_steps_sampled": 434400, "update_time_ms": 2.541, "num_steps_trained": 434400, "load_time_ms": 0.606, "default": {"kl": 0.013436969369649887, "cur_lr": 4.999999873689376e-05, "entropy": 9.918680191040039, "total_loss": 33.949283599853516, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12893246114253998, "vf_explained_var": 0.9761844277381897, "vf_loss": 34.06460952758789}, "grad_time_ms": 782.457}, "pid": 3934253, "time_total_s": 38345.03595113754, "episode_reward_mean": -152.69931875687953, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -170.43808917486143, "policy_reward_mean": {}, "episodes_total": 8688, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.0843494317296, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-15-41", "training_iteration": 362, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756509341, "episode_len_mean": 50.0, "timesteps_since_restore": 434400, "time_since_restore": 38345.03595113754, "time_this_iter_s": 100.56175565719604, "iterations_since_restore": 362}
+{"timesteps_total": 435600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94747.806, "num_steps_sampled": 435600, "update_time_ms": 2.617, "num_steps_trained": 435600, "load_time_ms": 0.609, "default": {"kl": 0.015112587250769138, "cur_lr": 4.999999873689376e-05, "entropy": 10.319666862487793, "total_loss": 37.49136734008789, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1251940131187439, "vf_explained_var": 0.9763219356536865, "vf_loss": 37.601261138916016}, "grad_time_ms": 791.851}, "pid": 3934253, "time_total_s": 38454.15379524231, "episode_reward_mean": -153.22232896328572, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -175.87434224939994, "policy_reward_mean": {}, "episodes_total": 8712, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.0843494317296, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-17-30", "training_iteration": 363, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756509450, "episode_len_mean": 50.0, "timesteps_since_restore": 435600, "time_since_restore": 38454.15379524231, "time_this_iter_s": 109.11784410476685, "iterations_since_restore": 363}
+{"timesteps_total": 436800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93394.588, "num_steps_sampled": 436800, "update_time_ms": 2.647, "num_steps_trained": 436800, "load_time_ms": 0.609, "default": {"kl": 0.013049306347966194, "cur_lr": 4.999999873689376e-05, "entropy": 10.128003120422363, "total_loss": 40.55475997924805, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12103336304426193, "vf_explained_var": 0.9705398082733154, "vf_loss": 40.66258239746094}, "grad_time_ms": 798.195}, "pid": 3934253, "time_total_s": 38541.14335441589, "episode_reward_mean": -153.27688113916284, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -175.87434224939994, "policy_reward_mean": {}, "episodes_total": 8736, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.0843494317296, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-18-57", "training_iteration": 364, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756509537, "episode_len_mean": 50.0, "timesteps_since_restore": 436800, "time_since_restore": 38541.14335441589, "time_this_iter_s": 86.98955917358398, "iterations_since_restore": 364}
+{"timesteps_total": 438000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95569.423, "num_steps_sampled": 438000, "update_time_ms": 2.643, "num_steps_trained": 438000, "load_time_ms": 0.604, "default": {"kl": 0.013601518236100674, "cur_lr": 4.999999873689376e-05, "entropy": 9.885064125061035, "total_loss": 24.32900619506836, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13153356313705444, "vf_explained_var": 0.9816988706588745, "vf_loss": 24.446767807006836}, "grad_time_ms": 779.284}, "pid": 3934253, "time_total_s": 38651.03580594063, "episode_reward_mean": -153.14458819004005, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -175.87434224939994, "policy_reward_mean": {}, "episodes_total": 8760, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.0843494317296, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-20-47", "training_iteration": 365, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756509647, "episode_len_mean": 50.0, "timesteps_since_restore": 438000, "time_since_restore": 38651.03580594063, "time_this_iter_s": 109.8924515247345, "iterations_since_restore": 365}
+{"timesteps_total": 439200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94534.394, "num_steps_sampled": 439200, "update_time_ms": 2.634, "num_steps_trained": 439200, "load_time_ms": 0.62, "default": {"kl": 0.01434319093823433, "cur_lr": 4.999999873689376e-05, "entropy": 10.187789916992188, "total_loss": 27.139606475830078, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1373453140258789, "vf_explained_var": 0.9785805940628052, "vf_loss": 27.26243019104004}, "grad_time_ms": 784.596}, "pid": 3934253, "time_total_s": 38746.134162187576, "episode_reward_mean": -153.03023594593262, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -175.87434224939994, "policy_reward_mean": {}, "episodes_total": 8784, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -145.97616584542013, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-22-22", "training_iteration": 366, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756509742, "episode_len_mean": 50.0, "timesteps_since_restore": 439200, "time_since_restore": 38746.134162187576, "time_this_iter_s": 95.09835624694824, "iterations_since_restore": 366}
+{"timesteps_total": 440400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95815.865, "num_steps_sampled": 440400, "update_time_ms": 2.681, "num_steps_trained": 440400, "load_time_ms": 0.614, "default": {"kl": 0.01304242480546236, "cur_lr": 4.999999873689376e-05, "entropy": 9.783220291137695, "total_loss": 21.39423179626465, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1273837685585022, "vf_explained_var": 0.9835090637207031, "vf_loss": 21.50840950012207}, "grad_time_ms": 777.45}, "pid": 3934253, "time_total_s": 38838.64204645157, "episode_reward_mean": -152.98505145091403, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -175.1536698558524, "policy_reward_mean": {}, "episodes_total": 8808, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -147.98162832608875, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-23-54", "training_iteration": 367, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756509834, "episode_len_mean": 50.0, "timesteps_since_restore": 440400, "time_since_restore": 38838.64204645157, "time_this_iter_s": 92.50788426399231, "iterations_since_restore": 367}
+{"timesteps_total": 441600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95877.34, "num_steps_sampled": 441600, "update_time_ms": 2.732, "num_steps_trained": 441600, "load_time_ms": 0.625, "default": {"kl": 0.014993922784924507, "cur_lr": 4.999999873689376e-05, "entropy": 10.211225509643555, "total_loss": 30.539302825927734, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13054004311561584, "vf_explained_var": 0.9768690466880798, "vf_loss": 30.654659271240234}, "grad_time_ms": 774.471}, "pid": 3934253, "time_total_s": 38931.52576327324, "episode_reward_mean": -153.22040865837252, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -175.1536698558524, "policy_reward_mean": {}, "episodes_total": 8832, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -147.98162832608875, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-25-27", "training_iteration": 368, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756509927, "episode_len_mean": 50.0, "timesteps_since_restore": 441600, "time_since_restore": 38931.52576327324, "time_this_iter_s": 92.88371682167053, "iterations_since_restore": 368}
+{"timesteps_total": 442800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95571.739, "num_steps_sampled": 442800, "update_time_ms": 2.779, "num_steps_trained": 442800, "load_time_ms": 0.63, "default": {"kl": 0.014243930578231812, "cur_lr": 4.999999873689376e-05, "entropy": 9.84453010559082, "total_loss": 14.643656730651855, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12754985690116882, "vf_explained_var": 0.9877651929855347, "vf_loss": 14.75678539276123}, "grad_time_ms": 774.089}, "pid": 3934253, "time_total_s": 39025.988913059235, "episode_reward_mean": -153.35602109097817, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -175.1536698558524, "policy_reward_mean": {}, "episodes_total": 8856, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -147.98162832608875, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-27-02", "training_iteration": 369, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756510022, "episode_len_mean": 50.0, "timesteps_since_restore": 442800, "time_since_restore": 39025.988913059235, "time_this_iter_s": 94.46314978599548, "iterations_since_restore": 369}
+{"timesteps_total": 444000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93135.38, "num_steps_sampled": 444000, "update_time_ms": 2.732, "num_steps_trained": 444000, "load_time_ms": 0.629, "default": {"kl": 0.014785230159759521, "cur_lr": 4.999999873689376e-05, "entropy": 9.96976089477539, "total_loss": 14.757744789123535, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11156058311462402, "vf_explained_var": 0.9873138070106506, "vf_loss": 14.854334831237793}, "grad_time_ms": 773.513}, "pid": 3934253, "time_total_s": 39096.677599191666, "episode_reward_mean": -153.33171487671436, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -175.1536698558524, "policy_reward_mean": {}, "episodes_total": 8880, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -149.2437295888303, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-28-12", "training_iteration": 370, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756510092, "episode_len_mean": 50.0, "timesteps_since_restore": 444000, "time_since_restore": 39096.677599191666, "time_this_iter_s": 70.68868613243103, "iterations_since_restore": 370}
+{"timesteps_total": 445200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94533.882, "num_steps_sampled": 445200, "update_time_ms": 2.737, "num_steps_trained": 445200, "load_time_ms": 0.635, "default": {"kl": 0.014057965949177742, "cur_lr": 4.999999873689376e-05, "entropy": 10.287542343139648, "total_loss": 23.995384216308594, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12851697206497192, "vf_explained_var": 0.9828624725341797, "vf_loss": 24.10966682434082}, "grad_time_ms": 766.421}, "pid": 3934253, "time_total_s": 39197.56882786751, "episode_reward_mean": -153.44320350684313, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -171.5362803146453, "policy_reward_mean": {}, "episodes_total": 8904, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.9455142032621, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-29-53", "training_iteration": 371, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756510193, "episode_len_mean": 50.0, "timesteps_since_restore": 445200, "time_since_restore": 39197.56882786751, "time_this_iter_s": 100.89122867584229, "iterations_since_restore": 371}
+{"timesteps_total": 446400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93615.93, "num_steps_sampled": 446400, "update_time_ms": 2.668, "num_steps_trained": 446400, "load_time_ms": 0.63, "default": {"kl": 0.01378762349486351, "cur_lr": 4.999999873689376e-05, "entropy": 9.977514266967773, "total_loss": 16.470462799072266, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12618333101272583, "vf_explained_var": 0.9869677424430847, "vf_loss": 16.582687377929688}, "grad_time_ms": 770.079}, "pid": 3934253, "time_total_s": 39288.986879348755, "episode_reward_mean": -153.08341630954703, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -171.5362803146453, "policy_reward_mean": {}, "episodes_total": 8928, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.9455142032621, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-31-25", "training_iteration": 372, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756510285, "episode_len_mean": 50.0, "timesteps_since_restore": 446400, "time_since_restore": 39288.986879348755, "time_this_iter_s": 91.41805148124695, "iterations_since_restore": 372}
+{"timesteps_total": 447600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94375.551, "num_steps_sampled": 447600, "update_time_ms": 2.64, "num_steps_trained": 447600, "load_time_ms": 0.662, "default": {"kl": 0.013898391276597977, "cur_lr": 4.999999873689376e-05, "entropy": 10.216779708862305, "total_loss": 48.11854934692383, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12654566764831543, "vf_explained_var": 0.9678885340690613, "vf_loss": 48.23102569580078}, "grad_time_ms": 763.214}, "pid": 3934253, "time_total_s": 39405.63260102272, "episode_reward_mean": -153.18250980534327, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -171.5362803146453, "policy_reward_mean": {}, "episodes_total": 8952, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.9455142032621, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-33-21", "training_iteration": 373, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756510401, "episode_len_mean": 50.0, "timesteps_since_restore": 447600, "time_since_restore": 39405.63260102272, "time_this_iter_s": 116.64572167396545, "iterations_since_restore": 373}
+{"timesteps_total": 448800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94646.345, "num_steps_sampled": 448800, "update_time_ms": 2.658, "num_steps_trained": 448800, "load_time_ms": 0.673, "default": {"kl": 0.01273138914257288, "cur_lr": 4.999999873689376e-05, "entropy": 9.943889617919922, "total_loss": 28.784555435180664, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1258401721715927, "vf_explained_var": 0.977308988571167, "vf_loss": 28.897504806518555}, "grad_time_ms": 767.596}, "pid": 3934253, "time_total_s": 39495.37490296364, "episode_reward_mean": -153.0939093284892, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -171.5362803146453, "policy_reward_mean": {}, "episodes_total": 8976, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.9277414104081, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-34-51", "training_iteration": 374, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756510491, "episode_len_mean": 50.0, "timesteps_since_restore": 448800, "time_since_restore": 39495.37490296364, "time_this_iter_s": 89.74230194091797, "iterations_since_restore": 374}
+{"timesteps_total": 450000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92320.277, "num_steps_sampled": 450000, "update_time_ms": 2.638, "num_steps_trained": 450000, "load_time_ms": 0.67, "default": {"kl": 0.012571917846798897, "cur_lr": 4.999999873689376e-05, "entropy": 9.955538749694824, "total_loss": 23.156606674194336, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1303580403327942, "vf_explained_var": 0.9838725328445435, "vf_loss": 23.274234771728516}, "grad_time_ms": 780.454}, "pid": 3934253, "time_total_s": 39582.134382009506, "episode_reward_mean": -153.18728333636233, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -170.6081921394304, "policy_reward_mean": {}, "episodes_total": 9000, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.9277414104081, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-36-18", "training_iteration": 375, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756510578, "episode_len_mean": 50.0, "timesteps_since_restore": 450000, "time_since_restore": 39582.134382009506, "time_this_iter_s": 86.75947904586792, "iterations_since_restore": 375}
+{"timesteps_total": 451200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92309.316, "num_steps_sampled": 451200, "update_time_ms": 2.649, "num_steps_trained": 451200, "load_time_ms": 0.65, "default": {"kl": 0.014042828232049942, "cur_lr": 4.999999873689376e-05, "entropy": 9.61319351196289, "total_loss": 29.14134979248047, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12289823591709137, "vf_explained_var": 0.9794071912765503, "vf_loss": 29.25002670288086}, "grad_time_ms": 779.417}, "pid": 3934253, "time_total_s": 39677.111968278885, "episode_reward_mean": -153.18059563870236, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -174.89906397580594, "policy_reward_mean": {}, "episodes_total": 9024, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.9277414104081, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-37-53", "training_iteration": 376, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756510673, "episode_len_mean": 50.0, "timesteps_since_restore": 451200, "time_since_restore": 39677.111968278885, "time_this_iter_s": 94.97758626937866, "iterations_since_restore": 376}
+{"timesteps_total": 452400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92738.748, "num_steps_sampled": 452400, "update_time_ms": 2.664, "num_steps_trained": 452400, "load_time_ms": 0.652, "default": {"kl": 0.012846022844314575, "cur_lr": 4.999999873689376e-05, "entropy": 9.973522186279297, "total_loss": 34.768245697021484, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1277947723865509, "vf_explained_var": 0.9744422435760498, "vf_loss": 34.883033752441406}, "grad_time_ms": 791.139}, "pid": 3934253, "time_total_s": 39774.030656814575, "episode_reward_mean": -153.291892610524, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -174.89906397580594, "policy_reward_mean": {}, "episodes_total": 9048, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -136.53761693354755, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-39-30", "training_iteration": 377, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756510770, "episode_len_mean": 50.0, "timesteps_since_restore": 452400, "time_since_restore": 39774.030656814575, "time_this_iter_s": 96.91868853569031, "iterations_since_restore": 377}
+{"timesteps_total": 453600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 91769.429, "num_steps_sampled": 453600, "update_time_ms": 2.585, "num_steps_trained": 453600, "load_time_ms": 0.646, "default": {"kl": 0.015167261473834515, "cur_lr": 4.999999873689376e-05, "entropy": 10.005805969238281, "total_loss": 22.82137680053711, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13752031326293945, "vf_explained_var": 0.9811097383499146, "vf_loss": 22.943540573120117}, "grad_time_ms": 801.995}, "pid": 3934253, "time_total_s": 39857.32714128494, "episode_reward_mean": -153.35642537200582, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -174.89906397580594, "policy_reward_mean": {}, "episodes_total": 9072, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -136.53761693354755, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-40-53", "training_iteration": 378, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756510853, "episode_len_mean": 50.0, "timesteps_since_restore": 453600, "time_since_restore": 39857.32714128494, "time_this_iter_s": 83.29648447036743, "iterations_since_restore": 378}
+{"timesteps_total": 454800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93552.532, "num_steps_sampled": 454800, "update_time_ms": 2.553, "num_steps_trained": 454800, "load_time_ms": 0.644, "default": {"kl": 0.01317631546407938, "cur_lr": 4.999999873689376e-05, "entropy": 9.828235626220703, "total_loss": 17.909996032714844, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12949572503566742, "vf_explained_var": 0.9854044318199158, "vf_loss": 18.02614974975586}, "grad_time_ms": 788.433}, "pid": 3934253, "time_total_s": 39969.48629593849, "episode_reward_mean": -153.18384773613363, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -174.89906397580594, "policy_reward_mean": {}, "episodes_total": 9096, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -136.53761693354755, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-42-45", "training_iteration": 379, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756510965, "episode_len_mean": 50.0, "timesteps_since_restore": 454800, "time_since_restore": 39969.48629593849, "time_this_iter_s": 112.1591546535492, "iterations_since_restore": 379}
+{"timesteps_total": 456000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93344.313, "num_steps_sampled": 456000, "update_time_ms": 2.541, "num_steps_trained": 456000, "load_time_ms": 0.643, "default": {"kl": 0.014248888939619064, "cur_lr": 4.999999873689376e-05, "entropy": 9.349405288696289, "total_loss": 17.249818801879883, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1263115406036377, "vf_explained_var": 0.9867851734161377, "vf_loss": 17.36170196533203}, "grad_time_ms": 772.99}, "pid": 3934253, "time_total_s": 40037.937469005585, "episode_reward_mean": -153.1766155089574, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -174.89906397580594, "policy_reward_mean": {}, "episodes_total": 9120, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -136.53761693354755, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-43-54", "training_iteration": 380, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756511034, "episode_len_mean": 50.0, "timesteps_since_restore": 456000, "time_since_restore": 40037.937469005585, "time_this_iter_s": 68.4511730670929, "iterations_since_restore": 380}
+{"timesteps_total": 457200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94568.446, "num_steps_sampled": 457200, "update_time_ms": 2.599, "num_steps_trained": 457200, "load_time_ms": 0.632, "default": {"kl": 0.014296084642410278, "cur_lr": 4.999999873689376e-05, "entropy": 10.027332305908203, "total_loss": 19.0135555267334, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12623052299022675, "vf_explained_var": 0.9851264953613281, "vf_loss": 19.125308990478516}, "grad_time_ms": 777.1}, "pid": 3934253, "time_total_s": 40151.110609054565, "episode_reward_mean": -152.8896881821496, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -168.73716899846337, "policy_reward_mean": {}, "episodes_total": 9144, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -147.7964379594772, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-45-47", "training_iteration": 381, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756511147, "episode_len_mean": 50.0, "timesteps_since_restore": 457200, "time_since_restore": 40151.110609054565, "time_this_iter_s": 113.17314004898071, "iterations_since_restore": 381}
+{"timesteps_total": 458400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92486.574, "num_steps_sampled": 458400, "update_time_ms": 2.597, "num_steps_trained": 458400, "load_time_ms": 0.638, "default": {"kl": 0.014583314768970013, "cur_lr": 4.999999873689376e-05, "entropy": 9.759105682373047, "total_loss": 17.389978408813477, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1328737437725067, "vf_explained_var": 0.9858565926551819, "vf_loss": 17.508085250854492}, "grad_time_ms": 785.307}, "pid": 3934253, "time_total_s": 40221.79202866554, "episode_reward_mean": -152.79855423647666, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.1867128581947, "policy_reward_mean": {}, "episodes_total": 9168, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -136.55946156197663, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-46-58", "training_iteration": 382, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756511218, "episode_len_mean": 50.0, "timesteps_since_restore": 458400, "time_since_restore": 40221.79202866554, "time_this_iter_s": 70.68141961097717, "iterations_since_restore": 382}
+{"timesteps_total": 459600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92935.526, "num_steps_sampled": 459600, "update_time_ms": 2.556, "num_steps_trained": 459600, "load_time_ms": 0.603, "default": {"kl": 0.013046178966760635, "cur_lr": 4.999999873689376e-05, "entropy": 9.58828067779541, "total_loss": 29.252241134643555, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13703730702400208, "vf_explained_var": 0.9791484475135803, "vf_loss": 29.376068115234375}, "grad_time_ms": 771.747}, "pid": 3934253, "time_total_s": 40342.79056477547, "episode_reward_mean": -153.13422255735932, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -174.47439067250346, "policy_reward_mean": {}, "episodes_total": 9192, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -136.55946156197663, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-48-59", "training_iteration": 383, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756511339, "episode_len_mean": 50.0, "timesteps_since_restore": 459600, "time_since_restore": 40342.79056477547, "time_this_iter_s": 120.99853610992432, "iterations_since_restore": 383}
+{"timesteps_total": 460800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93259.448, "num_steps_sampled": 460800, "update_time_ms": 2.518, "num_steps_trained": 460800, "load_time_ms": 0.601, "default": {"kl": 0.014214631170034409, "cur_lr": 4.999999873689376e-05, "entropy": 9.635729789733887, "total_loss": 14.057104110717773, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1296011358499527, "vf_explained_var": 0.9884568452835083, "vf_loss": 14.172313690185547}, "grad_time_ms": 773.144}, "pid": 3934253, "time_total_s": 40435.78492999077, "episode_reward_mean": -153.0059882991506, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -174.47439067250346, "policy_reward_mean": {}, "episodes_total": 9216, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -136.55946156197663, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-50-32", "training_iteration": 384, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756511432, "episode_len_mean": 50.0, "timesteps_since_restore": 460800, "time_since_restore": 40435.78492999077, "time_this_iter_s": 92.99436521530151, "iterations_since_restore": 384}
+{"timesteps_total": 462000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94043.3, "num_steps_sampled": 462000, "update_time_ms": 2.555, "num_steps_trained": 462000, "load_time_ms": 0.603, "default": {"kl": 0.01449158787727356, "cur_lr": 4.999999873689376e-05, "entropy": 9.566226959228516, "total_loss": 22.213275909423828, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12301838397979736, "vf_explained_var": 0.9816312789916992, "vf_loss": 22.321619033813477}, "grad_time_ms": 765.474}, "pid": 3934253, "time_total_s": 40530.30609059334, "episode_reward_mean": -152.85925076260227, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -174.47439067250346, "policy_reward_mean": {}, "episodes_total": 9240, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -136.55946156197663, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-52-06", "training_iteration": 385, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756511526, "episode_len_mean": 50.0, "timesteps_since_restore": 462000, "time_since_restore": 40530.30609059334, "time_this_iter_s": 94.52116060256958, "iterations_since_restore": 385}
+{"timesteps_total": 463200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95899.053, "num_steps_sampled": 463200, "update_time_ms": 2.533, "num_steps_trained": 463200, "load_time_ms": 0.614, "default": {"kl": 0.013922227546572685, "cur_lr": 4.999999873689376e-05, "entropy": 9.574094772338867, "total_loss": 23.11071014404297, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.115452341735363, "vf_explained_var": 0.9839463829994202, "vf_loss": 23.212068557739258}, "grad_time_ms": 767.397}, "pid": 3934253, "time_total_s": 40643.86023974419, "episode_reward_mean": -153.37401042518425, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -174.47439067250346, "policy_reward_mean": {}, "episodes_total": 9264, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -136.55946156197663, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-54-00", "training_iteration": 386, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756511640, "episode_len_mean": 50.0, "timesteps_since_restore": 463200, "time_since_restore": 40643.86023974419, "time_this_iter_s": 113.55414915084839, "iterations_since_restore": 386}
+{"timesteps_total": 464400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94600.447, "num_steps_sampled": 464400, "update_time_ms": 2.467, "num_steps_trained": 464400, "load_time_ms": 0.617, "default": {"kl": 0.013455020263791084, "cur_lr": 4.999999873689376e-05, "entropy": 9.93942928314209, "total_loss": 71.57559204101562, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1256643831729889, "vf_explained_var": 0.9553078413009644, "vf_loss": 71.68763732910156}, "grad_time_ms": 760.01}, "pid": 3934253, "time_total_s": 40727.71838593483, "episode_reward_mean": -153.81044741787505, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -185.61971742619494, "policy_reward_mean": {}, "episodes_total": 9288, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.7858068921068, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-55-24", "training_iteration": 387, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756511724, "episode_len_mean": 50.0, "timesteps_since_restore": 464400, "time_since_restore": 40727.71838593483, "time_this_iter_s": 83.85814619064331, "iterations_since_restore": 387}
+{"timesteps_total": 465600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95143.577, "num_steps_sampled": 465600, "update_time_ms": 2.487, "num_steps_trained": 465600, "load_time_ms": 0.628, "default": {"kl": 0.014179746620357037, "cur_lr": 4.999999873689376e-05, "entropy": 9.57951545715332, "total_loss": 22.397836685180664, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12766240537166595, "vf_explained_var": 0.9822462797164917, "vf_loss": 22.51114273071289}, "grad_time_ms": 761.875}, "pid": 3934253, "time_total_s": 40816.4643805027, "episode_reward_mean": -153.7662331758303, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -185.61971742619494, "policy_reward_mean": {}, "episodes_total": 9312, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.64657409231407, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-56-53", "training_iteration": 388, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756511813, "episode_len_mean": 50.0, "timesteps_since_restore": 465600, "time_since_restore": 40816.4643805027, "time_this_iter_s": 88.7459945678711, "iterations_since_restore": 388}
+{"timesteps_total": 466800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94702.824, "num_steps_sampled": 466800, "update_time_ms": 2.473, "num_steps_trained": 466800, "load_time_ms": 0.624, "default": {"kl": 0.013959686271846294, "cur_lr": 4.999999873689376e-05, "entropy": 9.685425758361816, "total_loss": 20.270750045776367, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13417869806289673, "vf_explained_var": 0.98442143201828, "vf_loss": 20.390796661376953}, "grad_time_ms": 760.271}, "pid": 3934253, "time_total_s": 40924.1979534626, "episode_reward_mean": -153.9711238325928, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -185.61971742619494, "policy_reward_mean": {}, "episodes_total": 9336, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.64657409231407, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_01-58-40", "training_iteration": 389, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756511920, "episode_len_mean": 50.0, "timesteps_since_restore": 466800, "time_since_restore": 40924.1979534626, "time_this_iter_s": 107.7335729598999, "iterations_since_restore": 389}
+{"timesteps_total": 468000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 97748.726, "num_steps_sampled": 468000, "update_time_ms": 2.538, "num_steps_trained": 468000, "load_time_ms": 0.629, "default": {"kl": 0.0150027209892869, "cur_lr": 4.999999873689376e-05, "entropy": 9.611435890197754, "total_loss": 16.69760513305664, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1160043478012085, "vf_explained_var": 0.9859545230865479, "vf_loss": 16.798418045043945}, "grad_time_ms": 776.435}, "pid": 3934253, "time_total_s": 41023.27092075348, "episode_reward_mean": -153.2059437076237, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -185.61971742619494, "policy_reward_mean": {}, "episodes_total": 9360, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.64657409231407, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-00-19", "training_iteration": 390, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756512019, "episode_len_mean": 50.0, "timesteps_since_restore": 468000, "time_since_restore": 41023.27092075348, "time_this_iter_s": 99.0729672908783, "iterations_since_restore": 390}
+{"timesteps_total": 469200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 97941.064, "num_steps_sampled": 469200, "update_time_ms": 2.545, "num_steps_trained": 469200, "load_time_ms": 0.625, "default": {"kl": 0.01452625822275877, "cur_lr": 4.999999873689376e-05, "entropy": 9.65519905090332, "total_loss": 22.100902557373047, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12013532221317291, "vf_explained_var": 0.982020378112793, "vf_loss": 22.206329345703125}, "grad_time_ms": 765.075}, "pid": 3934253, "time_total_s": 41138.254877090454, "episode_reward_mean": -152.90577764465885, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -170.25628936587407, "policy_reward_mean": {}, "episodes_total": 9384, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.64657409231407, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-02-14", "training_iteration": 391, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756512134, "episode_len_mean": 50.0, "timesteps_since_restore": 469200, "time_since_restore": 41138.254877090454, "time_this_iter_s": 114.9839563369751, "iterations_since_restore": 391}
+{"timesteps_total": 470400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 100327.709, "num_steps_sampled": 470400, "update_time_ms": 2.58, "num_steps_trained": 470400, "load_time_ms": 0.627, "default": {"kl": 0.01367896981537342, "cur_lr": 4.999999873689376e-05, "entropy": 9.717622756958008, "total_loss": 20.730247497558594, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12303749471902847, "vf_explained_var": 0.9840138554573059, "vf_loss": 20.839435577392578}, "grad_time_ms": 740.551}, "pid": 3934253, "time_total_s": 41232.55836844444, "episode_reward_mean": -152.76562004405554, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -169.46345236421746, "policy_reward_mean": {}, "episodes_total": 9408, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -146.8892861391005, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-03-49", "training_iteration": 392, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756512229, "episode_len_mean": 50.0, "timesteps_since_restore": 470400, "time_since_restore": 41232.55836844444, "time_this_iter_s": 94.30349135398865, "iterations_since_restore": 392}
+{"timesteps_total": 471600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 98909.243, "num_steps_sampled": 471600, "update_time_ms": 2.595, "num_steps_trained": 471600, "load_time_ms": 0.63, "default": {"kl": 0.01300249807536602, "cur_lr": 4.999999873689376e-05, "entropy": 9.604305267333984, "total_loss": 19.531492233276367, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11763381958007812, "vf_explained_var": 0.9843325018882751, "vf_loss": 19.635961532592773}, "grad_time_ms": 758.83}, "pid": 3934253, "time_total_s": 41339.5549621582, "episode_reward_mean": -152.6976787690023, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -169.46345236421746, "policy_reward_mean": {}, "episodes_total": 9432, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.83068117605868, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-05-36", "training_iteration": 393, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756512336, "episode_len_mean": 50.0, "timesteps_since_restore": 471600, "time_since_restore": 41339.5549621582, "time_this_iter_s": 106.99659371376038, "iterations_since_restore": 393}
+{"timesteps_total": 472800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 98506.519, "num_steps_sampled": 472800, "update_time_ms": 2.567, "num_steps_trained": 472800, "load_time_ms": 0.624, "default": {"kl": 0.013971512205898762, "cur_lr": 4.999999873689376e-05, "entropy": 9.62321949005127, "total_loss": 13.953452110290527, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12165407091379166, "vf_explained_var": 0.9881808757781982, "vf_loss": 14.06096076965332}, "grad_time_ms": 755.993}, "pid": 3934253, "time_total_s": 41428.493270635605, "episode_reward_mean": -152.73886156700593, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -169.46345236421746, "policy_reward_mean": {}, "episodes_total": 9456, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.81457270615553, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-07-05", "training_iteration": 394, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756512425, "episode_len_mean": 50.0, "timesteps_since_restore": 472800, "time_since_restore": 41428.493270635605, "time_this_iter_s": 88.93830847740173, "iterations_since_restore": 394}
+{"timesteps_total": 474000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 97189.632, "num_steps_sampled": 474000, "update_time_ms": 2.492, "num_steps_trained": 474000, "load_time_ms": 0.628, "default": {"kl": 0.012882490642368793, "cur_lr": 4.999999873689376e-05, "entropy": 9.573514938354492, "total_loss": 30.2314453125, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11516463756561279, "vf_explained_var": 0.978480875492096, "vf_loss": 30.333566665649414}, "grad_time_ms": 755.275}, "pid": 3934253, "time_total_s": 41509.83872747421, "episode_reward_mean": -152.55249194049549, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.33702468179493, "policy_reward_mean": {}, "episodes_total": 9480, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.81457270615553, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-08-26", "training_iteration": 395, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756512506, "episode_len_mean": 50.0, "timesteps_since_restore": 474000, "time_since_restore": 41509.83872747421, "time_this_iter_s": 81.34545683860779, "iterations_since_restore": 395}
+{"timesteps_total": 475200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 97357.778, "num_steps_sampled": 475200, "update_time_ms": 2.494, "num_steps_trained": 475200, "load_time_ms": 0.626, "default": {"kl": 0.015613549388945103, "cur_lr": 4.999999873689376e-05, "entropy": 9.676960945129395, "total_loss": 20.19458770751953, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12485632300376892, "vf_explained_var": 0.9847643375396729, "vf_loss": 20.303634643554688}, "grad_time_ms": 730.936}, "pid": 3934253, "time_total_s": 41624.83124899864, "episode_reward_mean": -152.67328401582608, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.33702468179493, "policy_reward_mean": {}, "episodes_total": 9504, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.81457270615553, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-10-21", "training_iteration": 396, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756512621, "episode_len_mean": 50.0, "timesteps_since_restore": 475200, "time_since_restore": 41624.83124899864, "time_this_iter_s": 114.99252152442932, "iterations_since_restore": 396}
+{"timesteps_total": 476400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 100373.912, "num_steps_sampled": 476400, "update_time_ms": 2.536, "num_steps_trained": 476400, "load_time_ms": 0.621, "default": {"kl": 0.014947790652513504, "cur_lr": 4.999999873689376e-05, "entropy": 9.738167762756348, "total_loss": 26.864194869995117, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14435940980911255, "vf_explained_var": 0.9804407954216003, "vf_loss": 26.99342155456543}, "grad_time_ms": 726.922}, "pid": 3934253, "time_total_s": 41738.81172847748, "episode_reward_mean": -152.73994919692365, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.33702468179493, "policy_reward_mean": {}, "episodes_total": 9528, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.81457270615553, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-12-15", "training_iteration": 397, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756512735, "episode_len_mean": 50.0, "timesteps_since_restore": 476400, "time_since_restore": 41738.81172847748, "time_this_iter_s": 113.98047947883606, "iterations_since_restore": 397}
+{"timesteps_total": 477600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 99110.223, "num_steps_sampled": 477600, "update_time_ms": 2.541, "num_steps_trained": 477600, "load_time_ms": 0.621, "default": {"kl": 0.014783354476094246, "cur_lr": 4.999999873689376e-05, "entropy": 9.937175750732422, "total_loss": 36.52134323120117, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.125640869140625, "vf_explained_var": 0.9722763895988464, "vf_loss": 36.632015228271484}, "grad_time_ms": 725.375}, "pid": 3934253, "time_total_s": 41814.905596494675, "episode_reward_mean": -153.10580437943494, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -178.8284532302404, "policy_reward_mean": {}, "episodes_total": 9552, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -148.82691292199615, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-13-31", "training_iteration": 398, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756512811, "episode_len_mean": 50.0, "timesteps_since_restore": 477600, "time_since_restore": 41814.905596494675, "time_this_iter_s": 76.09386801719666, "iterations_since_restore": 398}
+{"timesteps_total": 478800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 97041.268, "num_steps_sampled": 478800, "update_time_ms": 2.524, "num_steps_trained": 478800, "load_time_ms": 0.626, "default": {"kl": 0.01445402018725872, "cur_lr": 4.999999873689376e-05, "entropy": 9.523843765258789, "total_loss": 20.581594467163086, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12159392237663269, "vf_explained_var": 0.9837309718132019, "vf_loss": 20.68855094909668}, "grad_time_ms": 734.791}, "pid": 3934253, "time_total_s": 41902.04425191879, "episode_reward_mean": -153.46935723412918, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -178.8284532302404, "policy_reward_mean": {}, "episodes_total": 9576, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -148.82691292199615, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-14-58", "training_iteration": 399, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756512898, "episode_len_mean": 50.0, "timesteps_since_restore": 478800, "time_since_restore": 41902.04425191879, "time_this_iter_s": 87.13865542411804, "iterations_since_restore": 399}
+{"timesteps_total": 480000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 96533.831, "num_steps_sampled": 480000, "update_time_ms": 2.47, "num_steps_trained": 480000, "load_time_ms": 0.622, "default": {"kl": 0.01431234646588564, "cur_lr": 4.999999873689376e-05, "entropy": 9.579992294311523, "total_loss": 22.560794830322266, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12019583582878113, "vf_explained_var": 0.9842327833175659, "vf_loss": 22.666500091552734}, "grad_time_ms": 707.123}, "pid": 3934253, "time_total_s": 41995.76532769203, "episode_reward_mean": -153.08571561344462, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -178.8284532302404, "policy_reward_mean": {}, "episodes_total": 9600, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -148.82691292199615, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-16-32", "training_iteration": 400, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756512992, "episode_len_mean": 50.0, "timesteps_since_restore": 480000, "time_since_restore": 41995.76532769203, "time_this_iter_s": 93.72107577323914, "iterations_since_restore": 400}
+{"timesteps_total": 481200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95440.939, "num_steps_sampled": 481200, "update_time_ms": 2.412, "num_steps_trained": 481200, "load_time_ms": 0.627, "default": {"kl": 0.01310575008392334, "cur_lr": 4.999999873689376e-05, "entropy": 9.719040870666504, "total_loss": 35.705787658691406, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13067464530467987, "vf_explained_var": 0.9781382083892822, "vf_loss": 35.82319641113281}, "grad_time_ms": 720.716}, "pid": 3934253, "time_total_s": 42099.95502829552, "episode_reward_mean": -153.3988099397184, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -178.8284532302404, "policy_reward_mean": {}, "episodes_total": 9624, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -145.95915465653817, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-18-16", "training_iteration": 401, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756513096, "episode_len_mean": 50.0, "timesteps_since_restore": 481200, "time_since_restore": 42099.95502829552, "time_this_iter_s": 104.18970060348511, "iterations_since_restore": 401}
+{"timesteps_total": 482400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94308.325, "num_steps_sampled": 482400, "update_time_ms": 2.409, "num_steps_trained": 482400, "load_time_ms": 0.62, "default": {"kl": 0.013833809643983841, "cur_lr": 4.999999873689376e-05, "entropy": 9.736509323120117, "total_loss": 35.20651626586914, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1259187012910843, "vf_explained_var": 0.9735874533653259, "vf_loss": 35.31842803955078}, "grad_time_ms": 732.584}, "pid": 3934253, "time_total_s": 42183.0499727726, "episode_reward_mean": -152.7485907641062, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -172.2784810744398, "policy_reward_mean": {}, "episodes_total": 9648, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -136.54575402752465, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-19-39", "training_iteration": 402, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756513179, "episode_len_mean": 50.0, "timesteps_since_restore": 482400, "time_since_restore": 42183.0499727726, "time_this_iter_s": 83.0949444770813, "iterations_since_restore": 402}
+{"timesteps_total": 483600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94824.157, "num_steps_sampled": 483600, "update_time_ms": 2.383, "num_steps_trained": 483600, "load_time_ms": 0.614, "default": {"kl": 0.012754004448652267, "cur_lr": 4.999999873689376e-05, "entropy": 9.690858840942383, "total_loss": 21.811321258544922, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1276492029428482, "vf_explained_var": 0.9835841655731201, "vf_loss": 21.926057815551758}, "grad_time_ms": 713.208}, "pid": 3934253, "time_total_s": 42295.01004576683, "episode_reward_mean": -152.46891549800432, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.81996427857436, "policy_reward_mean": {}, "episodes_total": 9672, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -136.54575402752465, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-21-31", "training_iteration": 403, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756513291, "episode_len_mean": 50.0, "timesteps_since_restore": 483600, "time_since_restore": 42295.01004576683, "time_this_iter_s": 111.96007299423218, "iterations_since_restore": 403}
+{"timesteps_total": 484800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 97164.078, "num_steps_sampled": 484800, "update_time_ms": 2.413, "num_steps_trained": 484800, "load_time_ms": 0.609, "default": {"kl": 0.014857407659292221, "cur_lr": 4.999999873689376e-05, "entropy": 9.539432525634766, "total_loss": 26.76873207092285, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13504831492900848, "vf_explained_var": 0.9832797646522522, "vf_loss": 26.888736724853516}, "grad_time_ms": 691.336}, "pid": 3934253, "time_total_s": 42407.1293554306, "episode_reward_mean": -152.53595371969553, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.81996427857436, "policy_reward_mean": {}, "episodes_total": 9696, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -136.54575402752465, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-23-23", "training_iteration": 404, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756513403, "episode_len_mean": 50.0, "timesteps_since_restore": 484800, "time_since_restore": 42407.1293554306, "time_this_iter_s": 112.11930966377258, "iterations_since_restore": 404}
+{"timesteps_total": 486000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 98961.935, "num_steps_sampled": 486000, "update_time_ms": 2.452, "num_steps_trained": 486000, "load_time_ms": 0.613, "default": {"kl": 0.012585025280714035, "cur_lr": 4.999999873689376e-05, "entropy": 9.700153350830078, "total_loss": 33.97825622558594, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11734248697757721, "vf_explained_var": 0.9767182469367981, "vf_loss": 34.08285903930664}, "grad_time_ms": 706.634}, "pid": 3934253, "time_total_s": 42506.60624504089, "episode_reward_mean": -152.02951228995173, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.54826698210027, "policy_reward_mean": {}, "episodes_total": 9720, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -136.54575402752465, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-25-03", "training_iteration": 405, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756513503, "episode_len_mean": 50.0, "timesteps_since_restore": 486000, "time_since_restore": 42506.60624504089, "time_this_iter_s": 99.47688961029053, "iterations_since_restore": 405}
+{"timesteps_total": 487200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 97683.436, "num_steps_sampled": 487200, "update_time_ms": 2.47, "num_steps_trained": 487200, "load_time_ms": 0.605, "default": {"kl": 0.012815814465284348, "cur_lr": 4.999999873689376e-05, "entropy": 9.51749324798584, "total_loss": 18.358110427856445, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13039404153823853, "vf_explained_var": 0.9866368770599365, "vf_loss": 18.475528717041016}, "grad_time_ms": 721.45}, "pid": 3934253, "time_total_s": 42608.96180129051, "episode_reward_mean": -152.5057609427979, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.98823848315914, "policy_reward_mean": {}, "episodes_total": 9744, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -145.99157178352348, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-26-45", "training_iteration": 406, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756513605, "episode_len_mean": 50.0, "timesteps_since_restore": 487200, "time_since_restore": 42608.96180129051, "time_this_iter_s": 102.35555624961853, "iterations_since_restore": 406}
+{"timesteps_total": 488400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 97026.104, "num_steps_sampled": 488400, "update_time_ms": 2.467, "num_steps_trained": 488400, "load_time_ms": 0.609, "default": {"kl": 0.013667297549545765, "cur_lr": 4.999999873689376e-05, "entropy": 9.660782814025879, "total_loss": 34.40043258666992, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.124124675989151, "vf_explained_var": 0.9725708365440369, "vf_loss": 34.510719299316406}, "grad_time_ms": 729.874}, "pid": 3934253, "time_total_s": 42716.45296001434, "episode_reward_mean": -152.4718104965969, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.98823848315914, "policy_reward_mean": {}, "episodes_total": 9768, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.33162856010452, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-28-33", "training_iteration": 407, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756513713, "episode_len_mean": 50.0, "timesteps_since_restore": 488400, "time_since_restore": 42716.45296001434, "time_this_iter_s": 107.49115872383118, "iterations_since_restore": 407}
+{"timesteps_total": 489600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 98824.481, "num_steps_sampled": 489600, "update_time_ms": 2.472, "num_steps_trained": 489600, "load_time_ms": 0.609, "default": {"kl": 0.013919343240559101, "cur_lr": 4.999999873689376e-05, "entropy": 9.630985260009766, "total_loss": 16.17458152770996, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11430396139621735, "vf_explained_var": 0.9877437949180603, "vf_loss": 16.274789810180664}, "grad_time_ms": 718.413}, "pid": 3934253, "time_total_s": 42810.41572546959, "episode_reward_mean": -152.49322413360747, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.22391862857077, "policy_reward_mean": {}, "episodes_total": 9792, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.33162856010452, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-30-07", "training_iteration": 408, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756513807, "episode_len_mean": 50.0, "timesteps_since_restore": 489600, "time_since_restore": 42810.41572546959, "time_this_iter_s": 93.96276545524597, "iterations_since_restore": 408}
+{"timesteps_total": 490800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 100598.573, "num_steps_sampled": 490800, "update_time_ms": 2.456, "num_steps_trained": 490800, "load_time_ms": 0.604, "default": {"kl": 0.013683994300663471, "cur_lr": 4.999999873689376e-05, "entropy": 9.548572540283203, "total_loss": 19.05156898498535, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1258140206336975, "vf_explained_var": 0.9866318106651306, "vf_loss": 19.163530349731445}, "grad_time_ms": 716.211}, "pid": 3934253, "time_total_s": 42915.273431539536, "episode_reward_mean": -152.54198270127512, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -171.02813922101154, "policy_reward_mean": {}, "episodes_total": 9816, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.7902382364414, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-31-52", "training_iteration": 409, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756513912, "episode_len_mean": 50.0, "timesteps_since_restore": 490800, "time_since_restore": 42915.273431539536, "time_this_iter_s": 104.85770606994629, "iterations_since_restore": 409}
+{"timesteps_total": 492000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 99761.434, "num_steps_sampled": 492000, "update_time_ms": 2.489, "num_steps_trained": 492000, "load_time_ms": 0.601, "default": {"kl": 0.013874795287847519, "cur_lr": 4.999999873689376e-05, "entropy": 9.748285293579102, "total_loss": 21.56228256225586, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12434862554073334, "vf_explained_var": 0.9823559522628784, "vf_loss": 21.672584533691406}, "grad_time_ms": 734.541}, "pid": 3934253, "time_total_s": 43000.80782318115, "episode_reward_mean": -152.27884884345352, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -171.02813922101154, "policy_reward_mean": {}, "episodes_total": 9840, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.7902382364414, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-33-17", "training_iteration": 410, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756513997, "episode_len_mean": 50.0, "timesteps_since_restore": 492000, "time_since_restore": 43000.80782318115, "time_this_iter_s": 85.53439164161682, "iterations_since_restore": 410}
+{"timesteps_total": 493200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 98640.007, "num_steps_sampled": 493200, "update_time_ms": 2.536, "num_steps_trained": 493200, "load_time_ms": 0.617, "default": {"kl": 0.013062255457043648, "cur_lr": 4.999999873689376e-05, "entropy": 9.480387687683105, "total_loss": 23.92295265197754, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14083310961723328, "vf_explained_var": 0.9820898771286011, "vf_loss": 24.050559997558594}, "grad_time_ms": 731.141}, "pid": 3934253, "time_total_s": 43093.75035619736, "episode_reward_mean": -152.388119586282, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -171.02813922101154, "policy_reward_mean": {}, "episodes_total": 9864, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.7902382364414, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-34-50", "training_iteration": 411, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756514090, "episode_len_mean": 50.0, "timesteps_since_restore": 493200, "time_since_restore": 43093.75035619736, "time_this_iter_s": 92.94253301620483, "iterations_since_restore": 411}
+{"timesteps_total": 494400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 100133.017, "num_steps_sampled": 494400, "update_time_ms": 2.551, "num_steps_trained": 494400, "load_time_ms": 0.62, "default": {"kl": 0.01390067394822836, "cur_lr": 4.999999873689376e-05, "entropy": 9.697103500366211, "total_loss": 25.10484504699707, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13179221749305725, "vf_explained_var": 0.9813117980957031, "vf_loss": 25.22256088256836}, "grad_time_ms": 732.214}, "pid": 3934253, "time_total_s": 43191.7867565155, "episode_reward_mean": -152.50038821208054, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -171.02813922101154, "policy_reward_mean": {}, "episodes_total": 9888, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.7902382364414, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-36-28", "training_iteration": 412, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756514188, "episode_len_mean": 50.0, "timesteps_since_restore": 494400, "time_since_restore": 43191.7867565155, "time_this_iter_s": 98.03640031814575, "iterations_since_restore": 412}
+{"timesteps_total": 495600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 101028.588, "num_steps_sampled": 495600, "update_time_ms": 2.505, "num_steps_trained": 495600, "load_time_ms": 0.622, "default": {"kl": 0.014487986452877522, "cur_lr": 4.999999873689376e-05, "entropy": 9.295341491699219, "total_loss": 20.39866065979004, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12015184760093689, "vf_explained_var": 0.9833239316940308, "vf_loss": 20.5041446685791}, "grad_time_ms": 747.509}, "pid": 3934253, "time_total_s": 43312.855503320694, "episode_reward_mean": -152.38164456540886, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.1559509614097, "policy_reward_mean": {}, "episodes_total": 9912, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -135.364826567015, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-38-29", "training_iteration": 413, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756514309, "episode_len_mean": 50.0, "timesteps_since_restore": 495600, "time_since_restore": 43312.855503320694, "time_this_iter_s": 121.06874680519104, "iterations_since_restore": 413}
+{"timesteps_total": 496800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 99359.069, "num_steps_sampled": 496800, "update_time_ms": 2.459, "num_steps_trained": 496800, "load_time_ms": 0.636, "default": {"kl": 0.014094003476202488, "cur_lr": 4.999999873689376e-05, "entropy": 9.570438385009766, "total_loss": 25.21484375, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13121233880519867, "vf_explained_var": 0.9804350733757019, "vf_loss": 25.331787109375}, "grad_time_ms": 760.267}, "pid": 3934253, "time_total_s": 43408.40692996979, "episode_reward_mean": -152.84416168800163, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.1559509614097, "policy_reward_mean": {}, "episodes_total": 9936, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -135.364826567015, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-40-05", "training_iteration": 414, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756514405, "episode_len_mean": 50.0, "timesteps_since_restore": 496800, "time_since_restore": 43408.40692996979, "time_this_iter_s": 95.55142664909363, "iterations_since_restore": 414}
+{"timesteps_total": 498000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 99718.859, "num_steps_sampled": 498000, "update_time_ms": 2.471, "num_steps_trained": 498000, "load_time_ms": 0.634, "default": {"kl": 0.013480665162205696, "cur_lr": 4.999999873689376e-05, "entropy": 9.567373275756836, "total_loss": 14.828624725341797, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12480054795742035, "vf_explained_var": 0.9876997470855713, "vf_loss": 14.939777374267578}, "grad_time_ms": 757.832}, "pid": 3934253, "time_total_s": 43511.4573700428, "episode_reward_mean": -152.41611989014177, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.1559509614097, "policy_reward_mean": {}, "episodes_total": 9960, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -135.364826567015, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-41-48", "training_iteration": 415, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756514508, "episode_len_mean": 50.0, "timesteps_since_restore": 498000, "time_since_restore": 43511.4573700428, "time_this_iter_s": 103.0504400730133, "iterations_since_restore": 415}
+{"timesteps_total": 499200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 99487.673, "num_steps_sampled": 499200, "update_time_ms": 2.494, "num_steps_trained": 499200, "load_time_ms": 0.671, "default": {"kl": 0.014436847530305386, "cur_lr": 4.999999873689376e-05, "entropy": 9.49398422241211, "total_loss": 21.52405548095703, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13773185014724731, "vf_explained_var": 0.9835493564605713, "vf_loss": 21.64716911315918}, "grad_time_ms": 762.62}, "pid": 3934253, "time_total_s": 43611.55019903183, "episode_reward_mean": -152.60514110878364, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.84073942014268, "policy_reward_mean": {}, "episodes_total": 9984, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -135.364826567015, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-43-28", "training_iteration": 416, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756514608, "episode_len_mean": 50.0, "timesteps_since_restore": 499200, "time_since_restore": 43611.55019903183, "time_this_iter_s": 100.09282898902893, "iterations_since_restore": 416}
+{"timesteps_total": 500400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 98961.509, "num_steps_sampled": 500400, "update_time_ms": 2.496, "num_steps_trained": 500400, "load_time_ms": 0.673, "default": {"kl": 0.013998076319694519, "cur_lr": 4.999999873689376e-05, "entropy": 9.702149391174316, "total_loss": 27.09682846069336, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14793071150779724, "vf_explained_var": 0.980864405632019, "vf_loss": 27.2305850982666}, "grad_time_ms": 763.849}, "pid": 3934253, "time_total_s": 43713.79194974899, "episode_reward_mean": -152.75236425338213, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.84073942014268, "policy_reward_mean": {}, "episodes_total": 10008, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -140.077182822348, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-45-10", "training_iteration": 417, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756514710, "episode_len_mean": 50.0, "timesteps_since_restore": 500400, "time_since_restore": 43713.79194974899, "time_this_iter_s": 102.24175071716309, "iterations_since_restore": 417}
+{"timesteps_total": 501600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 99558.243, "num_steps_sampled": 501600, "update_time_ms": 2.51, "num_steps_trained": 501600, "load_time_ms": 0.663, "default": {"kl": 0.014370894990861416, "cur_lr": 4.999999873689376e-05, "entropy": 9.21036434173584, "total_loss": 20.671241760253906, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1176142692565918, "vf_explained_var": 0.9830334782600403, "vf_loss": 20.77430534362793}, "grad_time_ms": 759.136}, "pid": 3934253, "time_total_s": 43813.67440891266, "episode_reward_mean": -152.36210487112976, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.61907491036374, "policy_reward_mean": {}, "episodes_total": 10032, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -140.077182822348, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-46-50", "training_iteration": 418, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756514810, "episode_len_mean": 50.0, "timesteps_since_restore": 501600, "time_since_restore": 43813.67440891266, "time_this_iter_s": 99.88245916366577, "iterations_since_restore": 418}
+{"timesteps_total": 502800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 99253.648, "num_steps_sampled": 502800, "update_time_ms": 2.526, "num_steps_trained": 502800, "load_time_ms": 0.66, "default": {"kl": 0.011671670712530613, "cur_lr": 4.999999873689376e-05, "entropy": 9.367462158203125, "total_loss": 23.715415954589844, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12150892615318298, "vf_explained_var": 0.9807304739952087, "vf_loss": 23.825103759765625}, "grad_time_ms": 753.453}, "pid": 3934253, "time_total_s": 43915.42871594429, "episode_reward_mean": -152.4740142506281, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.61907491036374, "policy_reward_mean": {}, "episodes_total": 10056, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -140.077182822348, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-48-32", "training_iteration": 419, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756514912, "episode_len_mean": 50.0, "timesteps_since_restore": 502800, "time_since_restore": 43915.42871594429, "time_this_iter_s": 101.75430703163147, "iterations_since_restore": 419}
+{"timesteps_total": 504000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 101192.681, "num_steps_sampled": 504000, "update_time_ms": 2.561, "num_steps_trained": 504000, "load_time_ms": 0.663, "default": {"kl": 0.013619640842080116, "cur_lr": 4.999999873689376e-05, "entropy": 9.553508758544922, "total_loss": 19.23631477355957, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11064037680625916, "vf_explained_var": 0.9852237701416016, "vf_loss": 19.333168029785156}, "grad_time_ms": 752.56}, "pid": 3934253, "time_total_s": 44020.34438610077, "episode_reward_mean": -152.088058321042, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.61907491036374, "policy_reward_mean": {}, "episodes_total": 10080, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -140.077182822348, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-50-17", "training_iteration": 420, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756515017, "episode_len_mean": 50.0, "timesteps_since_restore": 504000, "time_since_restore": 44020.34438610077, "time_this_iter_s": 104.91567015647888, "iterations_since_restore": 420}
+{"timesteps_total": 505200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 102467.95, "num_steps_sampled": 505200, "update_time_ms": 2.496, "num_steps_trained": 505200, "load_time_ms": 0.65, "default": {"kl": 0.01381174847483635, "cur_lr": 4.999999873689376e-05, "entropy": 9.478511810302734, "total_loss": 16.577302932739258, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1256970316171646, "vf_explained_var": 0.9875710606575012, "vf_loss": 16.689016342163086}, "grad_time_ms": 756.674}, "pid": 3934253, "time_total_s": 44126.080137491226, "episode_reward_mean": -152.20027245584026, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -163.9500105131882, "policy_reward_mean": {}, "episodes_total": 10104, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.6992763566649, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-52-03", "training_iteration": 421, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756515123, "episode_len_mean": 50.0, "timesteps_since_restore": 505200, "time_since_restore": 44126.080137491226, "time_this_iter_s": 105.73575139045715, "iterations_since_restore": 421}
+{"timesteps_total": 506400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 100725.323, "num_steps_sampled": 506400, "update_time_ms": 2.505, "num_steps_trained": 506400, "load_time_ms": 0.655, "default": {"kl": 0.014820229262113571, "cur_lr": 4.999999873689376e-05, "entropy": 9.623552322387695, "total_loss": 15.060821533203125, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12630988657474518, "vf_explained_var": 0.9873186945915222, "vf_loss": 15.172125816345215}, "grad_time_ms": 768.013}, "pid": 3934253, "time_total_s": 44206.80386471748, "episode_reward_mean": -152.1666959661188, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.13063243563758, "policy_reward_mean": {}, "episodes_total": 10128, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.6992763566649, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-53-23", "training_iteration": 422, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756515203, "episode_len_mean": 50.0, "timesteps_since_restore": 506400, "time_since_restore": 44206.80386471748, "time_this_iter_s": 80.72372722625732, "iterations_since_restore": 422}
+{"timesteps_total": 507600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 99141.36, "num_steps_sampled": 507600, "update_time_ms": 2.558, "num_steps_trained": 507600, "load_time_ms": 0.662, "default": {"kl": 0.014279918745160103, "cur_lr": 4.999999873689376e-05, "entropy": 9.518680572509766, "total_loss": 20.14760398864746, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12270474433898926, "vf_explained_var": 0.9837811589241028, "vf_loss": 20.255849838256836}, "grad_time_ms": 760.582}, "pid": 3934253, "time_total_s": 44311.95928025246, "episode_reward_mean": -152.37089182857787, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.13063243563758, "policy_reward_mean": {}, "episodes_total": 10152, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.6992763566649, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-55-09", "training_iteration": 423, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756515309, "episode_len_mean": 50.0, "timesteps_since_restore": 507600, "time_since_restore": 44311.95928025246, "time_this_iter_s": 105.15541553497314, "iterations_since_restore": 423}
+{"timesteps_total": 508800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 100358.021, "num_steps_sampled": 508800, "update_time_ms": 2.585, "num_steps_trained": 508800, "load_time_ms": 0.663, "default": {"kl": 0.012729505077004433, "cur_lr": 4.999999873689376e-05, "entropy": 9.574199676513672, "total_loss": 24.127349853515625, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12281505018472672, "vf_explained_var": 0.9814075827598572, "vf_loss": 24.237276077270508}, "grad_time_ms": 745.356}, "pid": 3934253, "time_total_s": 44419.52580022812, "episode_reward_mean": -152.83940788648562, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -169.17921882612953, "policy_reward_mean": {}, "episodes_total": 10176, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.74108753127996, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-56-56", "training_iteration": 424, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756515416, "episode_len_mean": 50.0, "timesteps_since_restore": 508800, "time_since_restore": 44419.52580022812, "time_this_iter_s": 107.56651997566223, "iterations_since_restore": 424}
+{"timesteps_total": 510000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 96418.653, "num_steps_sampled": 510000, "update_time_ms": 2.585, "num_steps_trained": 510000, "load_time_ms": 0.661, "default": {"kl": 0.014946307986974716, "cur_lr": 4.999999873689376e-05, "entropy": 9.48218822479248, "total_loss": 16.20340919494629, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14104242622852325, "vf_explained_var": 0.9878532886505127, "vf_loss": 16.32931900024414}, "grad_time_ms": 749.272}, "pid": 3934253, "time_total_s": 44483.22181510925, "episode_reward_mean": -152.96896037243326, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -169.17921882612953, "policy_reward_mean": {}, "episodes_total": 10200, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.74108753127996, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-58-00", "training_iteration": 425, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756515480, "episode_len_mean": 50.0, "timesteps_since_restore": 510000, "time_since_restore": 44483.22181510925, "time_this_iter_s": 63.69601488113403, "iterations_since_restore": 425}
+{"timesteps_total": 511200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94771.723, "num_steps_sampled": 511200, "update_time_ms": 2.541, "num_steps_trained": 511200, "load_time_ms": 0.625, "default": {"kl": 0.013613752089440823, "cur_lr": 4.999999873689376e-05, "entropy": 9.491787910461426, "total_loss": 31.104705810546875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13406051695346832, "vf_explained_var": 0.9770567417144775, "vf_loss": 31.22498321533203}, "grad_time_ms": 752.077}, "pid": 3934253, "time_total_s": 44566.871950387955, "episode_reward_mean": -153.15485623507504, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -176.3212741594545, "policy_reward_mean": {}, "episodes_total": 10224, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.76878927498908, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_02-59-24", "training_iteration": 426, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756515564, "episode_len_mean": 50.0, "timesteps_since_restore": 511200, "time_since_restore": 44566.871950387955, "time_this_iter_s": 83.65013527870178, "iterations_since_restore": 426}
+{"timesteps_total": 512400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92743.774, "num_steps_sampled": 512400, "update_time_ms": 2.568, "num_steps_trained": 512400, "load_time_ms": 0.622, "default": {"kl": 0.01447269693017006, "cur_lr": 4.999999873689376e-05, "entropy": 9.206316947937012, "total_loss": 16.799468994140625, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12001865357160568, "vf_explained_var": 0.9866235256195068, "vf_loss": 16.904834747314453}, "grad_time_ms": 740.284}, "pid": 3934253, "time_total_s": 44648.71591639519, "episode_reward_mean": -153.16727095351285, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -176.3212741594545, "policy_reward_mean": {}, "episodes_total": 10248, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.76878927498908, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-00-45", "training_iteration": 427, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756515645, "episode_len_mean": 50.0, "timesteps_since_restore": 512400, "time_since_restore": 44648.71591639519, "time_this_iter_s": 81.84396600723267, "iterations_since_restore": 427}
+{"timesteps_total": 513600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93115.004, "num_steps_sampled": 513600, "update_time_ms": 2.549, "num_steps_trained": 513600, "load_time_ms": 0.659, "default": {"kl": 0.015185288153588772, "cur_lr": 4.999999873689376e-05, "entropy": 9.132720947265625, "total_loss": 31.58395004272461, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1356636881828308, "vf_explained_var": 0.9850590825080872, "vf_loss": 31.70423698425293}, "grad_time_ms": 745.147}, "pid": 3934253, "time_total_s": 44752.36105489731, "episode_reward_mean": -152.47133940099715, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -176.3212741594545, "policy_reward_mean": {}, "episodes_total": 10272, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -136.65559761541954, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-02-29", "training_iteration": 428, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756515749, "episode_len_mean": 50.0, "timesteps_since_restore": 513600, "time_since_restore": 44752.36105489731, "time_this_iter_s": 103.64513850212097, "iterations_since_restore": 428}
+{"timesteps_total": 514800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93212.02, "num_steps_sampled": 514800, "update_time_ms": 2.565, "num_steps_trained": 514800, "load_time_ms": 0.659, "default": {"kl": 0.012950624339282513, "cur_lr": 4.999999873689376e-05, "entropy": 9.307674407958984, "total_loss": 20.67955780029297, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1103539988398552, "vf_explained_var": 0.983638346195221, "vf_loss": 20.77680015563965}, "grad_time_ms": 755.405}, "pid": 3934253, "time_total_s": 44855.18939137459, "episode_reward_mean": -152.44805865292292, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -176.3212741594545, "policy_reward_mean": {}, "episodes_total": 10296, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -136.65559761541954, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-04-12", "training_iteration": 429, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756515852, "episode_len_mean": 50.0, "timesteps_since_restore": 514800, "time_since_restore": 44855.18939137459, "time_this_iter_s": 102.82833647727966, "iterations_since_restore": 429}
+{"timesteps_total": 516000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 91066.479, "num_steps_sampled": 516000, "update_time_ms": 2.577, "num_steps_trained": 516000, "load_time_ms": 0.662, "default": {"kl": 0.014143170788884163, "cur_lr": 4.999999873689376e-05, "entropy": 9.144468307495117, "total_loss": 20.466529846191406, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12884706258773804, "vf_explained_var": 0.9840419292449951, "vf_loss": 20.5810546875}, "grad_time_ms": 760.555}, "pid": 3934253, "time_total_s": 44938.701545238495, "episode_reward_mean": -152.37429926190597, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.15191290750363, "policy_reward_mean": {}, "episodes_total": 10320, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -136.65559761541954, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-05-35", "training_iteration": 430, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756515935, "episode_len_mean": 50.0, "timesteps_since_restore": 516000, "time_since_restore": 44938.701545238495, "time_this_iter_s": 83.51215386390686, "iterations_since_restore": 430}
+{"timesteps_total": 517200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 87798.525, "num_steps_sampled": 517200, "update_time_ms": 2.592, "num_steps_trained": 517200, "load_time_ms": 0.659, "default": {"kl": 0.012897643260657787, "cur_lr": 4.999999873689376e-05, "entropy": 9.147050857543945, "total_loss": 23.396202087402344, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12415405362844467, "vf_explained_var": 0.9816821217536926, "vf_loss": 23.50729751586914}, "grad_time_ms": 753.518}, "pid": 3934253, "time_total_s": 45011.686506032944, "episode_reward_mean": -152.49162581204905, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.15191290750363, "policy_reward_mean": {}, "episodes_total": 10344, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -136.65559761541954, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-06-49", "training_iteration": 431, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756516009, "episode_len_mean": 50.0, "timesteps_since_restore": 517200, "time_since_restore": 45011.686506032944, "time_this_iter_s": 72.98496079444885, "iterations_since_restore": 431}
+{"timesteps_total": 518400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 87706.415, "num_steps_sampled": 518400, "update_time_ms": 2.562, "num_steps_trained": 518400, "load_time_ms": 0.67, "default": {"kl": 0.01292494498193264, "cur_lr": 4.999999873689376e-05, "entropy": 9.16193675994873, "total_loss": 22.140846252441406, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12487272173166275, "vf_explained_var": 0.982728123664856, "vf_loss": 22.252634048461914}, "grad_time_ms": 750.037}, "pid": 3934253, "time_total_s": 45091.453741550446, "episode_reward_mean": -152.63505965031305, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.15191290750363, "policy_reward_mean": {}, "episodes_total": 10368, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -136.65559761541954, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-08-08", "training_iteration": 432, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756516088, "episode_len_mean": 50.0, "timesteps_since_restore": 518400, "time_since_restore": 45091.453741550446, "time_this_iter_s": 79.76723551750183, "iterations_since_restore": 432}
+{"timesteps_total": 519600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 86294.257, "num_steps_sampled": 519600, "update_time_ms": 2.543, "num_steps_trained": 519600, "load_time_ms": 0.697, "default": {"kl": 0.014303537085652351, "cur_lr": 4.999999873689376e-05, "entropy": 9.481611251831055, "total_loss": 28.225297927856445, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13561999797821045, "vf_explained_var": 0.9792253971099854, "vf_loss": 28.346435546875}, "grad_time_ms": 753.54}, "pid": 3934253, "time_total_s": 45182.52351999283, "episode_reward_mean": -152.83210305416438, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.15191290750363, "policy_reward_mean": {}, "episodes_total": 10392, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.56123354539693, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-09-39", "training_iteration": 433, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756516179, "episode_len_mean": 50.0, "timesteps_since_restore": 519600, "time_since_restore": 45182.52351999283, "time_this_iter_s": 91.06977844238281, "iterations_since_restore": 433}
+{"timesteps_total": 520800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 87916.656, "num_steps_sampled": 520800, "update_time_ms": 2.533, "num_steps_trained": 520800, "load_time_ms": 0.692, "default": {"kl": 0.013387994840741158, "cur_lr": 4.999999873689376e-05, "entropy": 9.373644828796387, "total_loss": 35.60469055175781, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1338808387517929, "vf_explained_var": 0.9731928706169128, "vf_loss": 35.72500991821289}, "grad_time_ms": 779.642}, "pid": 3934253, "time_total_s": 45306.57466197014, "episode_reward_mean": -152.94840318610505, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.67520643826325, "policy_reward_mean": {}, "episodes_total": 10416, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.56123354539693, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-11-43", "training_iteration": 434, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756516303, "episode_len_mean": 50.0, "timesteps_since_restore": 520800, "time_since_restore": 45306.57466197014, "time_this_iter_s": 124.05114197731018, "iterations_since_restore": 434}
+{"timesteps_total": 522000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 90730.781, "num_steps_sampled": 522000, "update_time_ms": 2.591, "num_steps_trained": 522000, "load_time_ms": 0.694, "default": {"kl": 0.013694589026272297, "cur_lr": 4.999999873689376e-05, "entropy": 9.236662864685059, "total_loss": 25.01688003540039, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12344694137573242, "vf_explained_var": 0.9816955924034119, "vf_loss": 25.126461029052734}, "grad_time_ms": 775.417}, "pid": 3934253, "time_total_s": 45398.37049865723, "episode_reward_mean": -153.29016551966026, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.39962864199666, "policy_reward_mean": {}, "episodes_total": 10440, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.56123354539693, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-13-15", "training_iteration": 435, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756516395, "episode_len_mean": 50.0, "timesteps_since_restore": 522000, "time_since_restore": 45398.37049865723, "time_this_iter_s": 91.79583668708801, "iterations_since_restore": 435}
+{"timesteps_total": 523200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92707.667, "num_steps_sampled": 523200, "update_time_ms": 2.606, "num_steps_trained": 523200, "load_time_ms": 0.696, "default": {"kl": 0.015608757734298706, "cur_lr": 4.999999873689376e-05, "entropy": 9.019153594970703, "total_loss": 14.81684684753418, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13161876797676086, "vf_explained_var": 0.9873616099357605, "vf_loss": 14.932661056518555}, "grad_time_ms": 775.044}, "pid": 3934253, "time_total_s": 45501.78622722626, "episode_reward_mean": -153.08923072182094, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.39962864199666, "policy_reward_mean": {}, "episodes_total": 10464, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.56123354539693, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-14-59", "training_iteration": 436, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756516499, "episode_len_mean": 50.0, "timesteps_since_restore": 523200, "time_since_restore": 45501.78622722626, "time_this_iter_s": 103.41572856903076, "iterations_since_restore": 436}
+{"timesteps_total": 524400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93646.448, "num_steps_sampled": 524400, "update_time_ms": 2.68, "num_steps_trained": 524400, "load_time_ms": 0.696, "default": {"kl": 0.01425144076347351, "cur_lr": 4.999999873689376e-05, "entropy": 9.15731430053711, "total_loss": 30.321533203125, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1165459007024765, "vf_explained_var": 0.976507306098938, "vf_loss": 30.423648834228516}, "grad_time_ms": 775.914}, "pid": 3934253, "time_total_s": 45593.028044462204, "episode_reward_mean": -153.07520775521334, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.39962864199666, "policy_reward_mean": {}, "episodes_total": 10488, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -147.22449379964385, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-16-30", "training_iteration": 437, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756516590, "episode_len_mean": 50.0, "timesteps_since_restore": 524400, "time_since_restore": 45593.028044462204, "time_this_iter_s": 91.24181723594666, "iterations_since_restore": 437}
+{"timesteps_total": 525600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 91598.038, "num_steps_sampled": 525600, "update_time_ms": 2.7, "num_steps_trained": 525600, "load_time_ms": 0.683, "default": {"kl": 0.014275365509092808, "cur_lr": 4.999999873689376e-05, "entropy": 9.111166000366211, "total_loss": 21.981903076171875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13030636310577393, "vf_explained_var": 0.9825233817100525, "vf_loss": 22.097755432128906}, "grad_time_ms": 775.419}, "pid": 3934253, "time_total_s": 45676.183108091354, "episode_reward_mean": -152.90180067816212, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.3678219403425, "policy_reward_mean": {}, "episodes_total": 10512, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.91137618987028, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-17-53", "training_iteration": 438, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756516673, "episode_len_mean": 50.0, "timesteps_since_restore": 525600, "time_since_restore": 45676.183108091354, "time_this_iter_s": 83.15506362915039, "iterations_since_restore": 438}
+{"timesteps_total": 526800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92189.35, "num_steps_sampled": 526800, "update_time_ms": 2.637, "num_steps_trained": 526800, "load_time_ms": 0.682, "default": {"kl": 0.014680023305118084, "cur_lr": 4.999999873689376e-05, "entropy": 9.524951934814453, "total_loss": 16.266441345214844, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12172594666481018, "vf_explained_var": 0.9866619110107422, "vf_loss": 16.37330436706543}, "grad_time_ms": 764.032}, "pid": 3934253, "time_total_s": 45784.80823278427, "episode_reward_mean": -152.5567861979773, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.3678219403425, "policy_reward_mean": {}, "episodes_total": 10536, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -140.51117984519468, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-19-42", "training_iteration": 439, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756516782, "episode_len_mean": 50.0, "timesteps_since_restore": 526800, "time_since_restore": 45784.80823278427, "time_this_iter_s": 108.62512469291687, "iterations_since_restore": 439}
+{"timesteps_total": 528000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 91884.362, "num_steps_sampled": 528000, "update_time_ms": 2.573, "num_steps_trained": 528000, "load_time_ms": 0.68, "default": {"kl": 0.012988438829779625, "cur_lr": 4.999999873689376e-05, "entropy": 8.752195358276367, "total_loss": 14.445332527160645, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12197425961494446, "vf_explained_var": 0.9885706305503845, "vf_loss": 14.554155349731445}, "grad_time_ms": 769.323}, "pid": 3934253, "time_total_s": 45865.32222414017, "episode_reward_mean": -152.43594000190504, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.3678219403425, "policy_reward_mean": {}, "episodes_total": 10560, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -140.51117984519468, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-21-02", "training_iteration": 440, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756516862, "episode_len_mean": 50.0, "timesteps_since_restore": 528000, "time_since_restore": 45865.32222414017, "time_this_iter_s": 80.513991355896, "iterations_since_restore": 440}
+{"timesteps_total": 529200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92350.812, "num_steps_sampled": 529200, "update_time_ms": 2.622, "num_steps_trained": 529200, "load_time_ms": 0.68, "default": {"kl": 0.014562004245817661, "cur_lr": 4.999999873689376e-05, "entropy": 9.24899673461914, "total_loss": 13.435138702392578, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12867429852485657, "vf_explained_var": 0.9890771508216858, "vf_loss": 13.549068450927734}, "grad_time_ms": 775.906}, "pid": 3934253, "time_total_s": 45943.03843998909, "episode_reward_mean": -152.21569410457278, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.3678219403425, "policy_reward_mean": {}, "episodes_total": 10584, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -140.51117984519468, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-22-20", "training_iteration": 441, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756516940, "episode_len_mean": 50.0, "timesteps_since_restore": 529200, "time_since_restore": 45943.03843998909, "time_this_iter_s": 77.71621584892273, "iterations_since_restore": 441}
+{"timesteps_total": 530400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92189.01, "num_steps_sampled": 530400, "update_time_ms": 2.579, "num_steps_trained": 530400, "load_time_ms": 0.662, "default": {"kl": 0.0120732756331563, "cur_lr": 4.999999873689376e-05, "entropy": 8.865851402282715, "total_loss": 20.69765853881836, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11634629219770432, "vf_explained_var": 0.9846157431602478, "vf_loss": 20.801780700683594}, "grad_time_ms": 769.497}, "pid": 3934253, "time_total_s": 46021.12298822403, "episode_reward_mean": -152.15796697777017, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -163.17462601974535, "policy_reward_mean": {}, "episodes_total": 10608, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -140.51117984519468, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-23-38", "training_iteration": 442, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756517018, "episode_len_mean": 50.0, "timesteps_since_restore": 530400, "time_since_restore": 46021.12298822403, "time_this_iter_s": 78.08454823493958, "iterations_since_restore": 442}
+{"timesteps_total": 531600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92083.776, "num_steps_sampled": 531600, "update_time_ms": 2.534, "num_steps_trained": 531600, "load_time_ms": 0.624, "default": {"kl": 0.012755469419062138, "cur_lr": 4.999999873689376e-05, "entropy": 8.95416259765625, "total_loss": 24.801151275634766, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1164705753326416, "vf_explained_var": 0.979554295539856, "vf_loss": 24.904706954956055}, "grad_time_ms": 776.602}, "pid": 3934253, "time_total_s": 46111.20990753174, "episode_reward_mean": -152.36619517929225, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.71090088526697, "policy_reward_mean": {}, "episodes_total": 10632, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -149.24255595970118, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-25-08", "training_iteration": 443, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756517108, "episode_len_mean": 50.0, "timesteps_since_restore": 531600, "time_since_restore": 46111.20990753174, "time_this_iter_s": 90.08691930770874, "iterations_since_restore": 443}
+{"timesteps_total": 532800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 88291.327, "num_steps_sampled": 532800, "update_time_ms": 2.566, "num_steps_trained": 532800, "load_time_ms": 0.622, "default": {"kl": 0.01175283920019865, "cur_lr": 4.999999873689376e-05, "entropy": 8.902250289916992, "total_loss": 21.50499725341797, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12178131937980652, "vf_explained_var": 0.9841468334197998, "vf_loss": 21.614879608154297}, "grad_time_ms": 754.912}, "pid": 3934253, "time_total_s": 46197.119389534, "episode_reward_mean": -152.58799610219123, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.71090088526697, "policy_reward_mean": {}, "episodes_total": 10656, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -148.19138459858985, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-26-34", "training_iteration": 444, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756517194, "episode_len_mean": 50.0, "timesteps_since_restore": 532800, "time_since_restore": 46197.119389534, "time_this_iter_s": 85.9094820022583, "iterations_since_restore": 444}
+{"timesteps_total": 534000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 88752.555, "num_steps_sampled": 534000, "update_time_ms": 2.491, "num_steps_trained": 534000, "load_time_ms": 0.629, "default": {"kl": 0.014107207767665386, "cur_lr": 4.999999873689376e-05, "entropy": 8.85167407989502, "total_loss": 16.065641403198242, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1293773055076599, "vf_explained_var": 0.9869747161865234, "vf_loss": 16.180734634399414}, "grad_time_ms": 756.331}, "pid": 3934253, "time_total_s": 46293.54178571701, "episode_reward_mean": -152.4441623789422, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.71090088526697, "policy_reward_mean": {}, "episodes_total": 10680, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -148.19138459858985, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-28-11", "training_iteration": 445, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756517291, "episode_len_mean": 50.0, "timesteps_since_restore": 534000, "time_since_restore": 46293.54178571701, "time_this_iter_s": 96.42239618301392, "iterations_since_restore": 445}
+{"timesteps_total": 535200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 87968.815, "num_steps_sampled": 535200, "update_time_ms": 2.489, "num_steps_trained": 535200, "load_time_ms": 0.635, "default": {"kl": 0.014973337762057781, "cur_lr": 4.999999873689376e-05, "entropy": 9.602691650390625, "total_loss": 27.49502182006836, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14575064182281494, "vf_explained_var": 0.97819584608078, "vf_loss": 27.6256103515625}, "grad_time_ms": 764.849}, "pid": 3934253, "time_total_s": 46389.205899477005, "episode_reward_mean": -152.85829004269831, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.8271505938669, "policy_reward_mean": {}, "episodes_total": 10704, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.14836065978687, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-29-46", "training_iteration": 446, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756517386, "episode_len_mean": 50.0, "timesteps_since_restore": 535200, "time_since_restore": 46389.205899477005, "time_this_iter_s": 95.6641137599945, "iterations_since_restore": 446}
+{"timesteps_total": 536400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 87626.363, "num_steps_sampled": 536400, "update_time_ms": 2.417, "num_steps_trained": 536400, "load_time_ms": 0.643, "default": {"kl": 0.013655421324074268, "cur_lr": 4.999999873689376e-05, "entropy": 9.15749740600586, "total_loss": 25.52581787109375, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12191127240657806, "vf_explained_var": 0.9806229472160339, "vf_loss": 25.63390350341797}, "grad_time_ms": 763.984}, "pid": 3934253, "time_total_s": 46477.0133357048, "episode_reward_mean": -152.46147072903958, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.8271505938669, "policy_reward_mean": {}, "episodes_total": 10728, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.14836065978687, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-31-14", "training_iteration": 447, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756517474, "episode_len_mean": 50.0, "timesteps_since_restore": 536400, "time_since_restore": 46477.0133357048, "time_this_iter_s": 87.80743622779846, "iterations_since_restore": 447}
+{"timesteps_total": 537600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 89246.139, "num_steps_sampled": 537600, "update_time_ms": 2.385, "num_steps_trained": 537600, "load_time_ms": 0.627, "default": {"kl": 0.01547261606901884, "cur_lr": 4.999999873689376e-05, "entropy": 8.981388092041016, "total_loss": 20.75351333618164, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12472319602966309, "vf_explained_var": 0.9833012819290161, "vf_loss": 20.86256980895996}, "grad_time_ms": 766.874}, "pid": 3934253, "time_total_s": 46576.39440321922, "episode_reward_mean": -152.82945239896245, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -169.23393279477395, "policy_reward_mean": {}, "episodes_total": 10752, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -135.55991159320467, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-32-54", "training_iteration": 448, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756517574, "episode_len_mean": 50.0, "timesteps_since_restore": 537600, "time_since_restore": 46576.39440321922, "time_this_iter_s": 99.38106751441956, "iterations_since_restore": 448}
+{"timesteps_total": 538800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 86122.555, "num_steps_sampled": 538800, "update_time_ms": 2.412, "num_steps_trained": 538800, "load_time_ms": 0.634, "default": {"kl": 0.013270992785692215, "cur_lr": 4.999999873689376e-05, "entropy": 8.998639106750488, "total_loss": 17.40472412109375, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12813928723335266, "vf_explained_var": 0.9874011278152466, "vf_loss": 17.519426345825195}, "grad_time_ms": 779.815}, "pid": 3934253, "time_total_s": 46653.91430091858, "episode_reward_mean": -152.52047795142636, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -169.23393279477395, "policy_reward_mean": {}, "episodes_total": 10776, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -135.55991159320467, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-34-11", "training_iteration": 449, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756517651, "episode_len_mean": 50.0, "timesteps_since_restore": 538800, "time_since_restore": 46653.91430091858, "time_this_iter_s": 77.51989769935608, "iterations_since_restore": 449}
+{"timesteps_total": 540000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 84794.443, "num_steps_sampled": 540000, "update_time_ms": 2.416, "num_steps_trained": 540000, "load_time_ms": 0.635, "default": {"kl": 0.013216478750109673, "cur_lr": 4.999999873689376e-05, "entropy": 9.117257118225098, "total_loss": 21.370153427124023, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13916881382465363, "vf_explained_var": 0.9849511384963989, "vf_loss": 21.49593734741211}, "grad_time_ms": 779.671}, "pid": 3934253, "time_total_s": 46721.145233392715, "episode_reward_mean": -152.30260405709933, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -169.23393279477395, "policy_reward_mean": {}, "episodes_total": 10800, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -135.55991159320467, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-35-18", "training_iteration": 450, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756517718, "episode_len_mean": 50.0, "timesteps_since_restore": 540000, "time_since_restore": 46721.145233392715, "time_this_iter_s": 67.23093247413635, "iterations_since_restore": 450}
+{"timesteps_total": 541200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 86414.3, "num_steps_sampled": 541200, "update_time_ms": 2.406, "num_steps_trained": 541200, "load_time_ms": 0.637, "default": {"kl": 0.011747285723686218, "cur_lr": 4.999999873689376e-05, "entropy": 8.846776962280273, "total_loss": 35.44596862792969, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12402357161045074, "vf_explained_var": 0.97218257188797, "vf_loss": 35.558101654052734}, "grad_time_ms": 771.091}, "pid": 3934253, "time_total_s": 46814.974937200546, "episode_reward_mean": -152.2823230757062, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -169.23393279477395, "policy_reward_mean": {}, "episodes_total": 10824, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -135.55991159320467, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-36-52", "training_iteration": 451, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756517812, "episode_len_mean": 50.0, "timesteps_since_restore": 541200, "time_since_restore": 46814.974937200546, "time_this_iter_s": 93.82970380783081, "iterations_since_restore": 451}
+{"timesteps_total": 542400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 88969.508, "num_steps_sampled": 542400, "update_time_ms": 2.502, "num_steps_trained": 542400, "load_time_ms": 0.641, "default": {"kl": 0.014603732153773308, "cur_lr": 4.999999873689376e-05, "entropy": 9.174540519714355, "total_loss": 17.92264747619629, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1435505747795105, "vf_explained_var": 0.9859344959259033, "vf_loss": 18.051414489746094}, "grad_time_ms": 775.126}, "pid": 3934253, "time_total_s": 46918.65322470665, "episode_reward_mean": -152.32697364347348, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -169.23393279477395, "policy_reward_mean": {}, "episodes_total": 10848, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -140.8153902235786, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-38-36", "training_iteration": 452, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756517916, "episode_len_mean": 50.0, "timesteps_since_restore": 542400, "time_since_restore": 46918.65322470665, "time_this_iter_s": 103.67828750610352, "iterations_since_restore": 452}
+{"timesteps_total": 543600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 89585.511, "num_steps_sampled": 543600, "update_time_ms": 2.542, "num_steps_trained": 543600, "load_time_ms": 0.653, "default": {"kl": 0.014241022989153862, "cur_lr": 4.999999873689376e-05, "entropy": 8.966078758239746, "total_loss": 21.33045196533203, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12030242383480072, "vf_explained_var": 0.9839779734611511, "vf_loss": 21.43633460998535}, "grad_time_ms": 772.482}, "pid": 3934253, "time_total_s": 47014.87502336502, "episode_reward_mean": -152.05236692518466, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.94974357746918, "policy_reward_mean": {}, "episodes_total": 10872, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -140.8153902235786, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-40-12", "training_iteration": 453, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756518012, "episode_len_mean": 50.0, "timesteps_since_restore": 543600, "time_since_restore": 47014.87502336502, "time_this_iter_s": 96.22179865837097, "iterations_since_restore": 453}
+{"timesteps_total": 544800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 89867.981, "num_steps_sampled": 544800, "update_time_ms": 2.514, "num_steps_trained": 544800, "load_time_ms": 0.654, "default": {"kl": 0.013131268322467804, "cur_lr": 4.999999873689376e-05, "entropy": 8.861820220947266, "total_loss": 20.30666160583496, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1259278655052185, "vf_explained_var": 0.9839560389518738, "vf_loss": 20.419294357299805}, "grad_time_ms": 781.308}, "pid": 3934253, "time_total_s": 47103.69718146324, "episode_reward_mean": -152.21997083426987, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.6812075607711, "policy_reward_mean": {}, "episodes_total": 10896, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.9082405590833, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-41-41", "training_iteration": 454, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756518101, "episode_len_mean": 50.0, "timesteps_since_restore": 544800, "time_since_restore": 47103.69718146324, "time_this_iter_s": 88.82215809822083, "iterations_since_restore": 454}
+{"timesteps_total": 546000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 89019.008, "num_steps_sampled": 546000, "update_time_ms": 2.574, "num_steps_trained": 546000, "load_time_ms": 0.646, "default": {"kl": 0.013202676549553871, "cur_lr": 4.999999873689376e-05, "entropy": 8.931380271911621, "total_loss": 20.238691329956055, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12394557893276215, "vf_explained_var": 0.9842751026153564, "vf_loss": 20.349267959594727}, "grad_time_ms": 776.822}, "pid": 3934253, "time_total_s": 47191.58376741409, "episode_reward_mean": -152.22845068858172, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.6812075607711, "policy_reward_mean": {}, "episodes_total": 10920, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.9082405590833, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-43-09", "training_iteration": 455, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756518189, "episode_len_mean": 50.0, "timesteps_since_restore": 546000, "time_since_restore": 47191.58376741409, "time_this_iter_s": 87.88658595085144, "iterations_since_restore": 455}
+{"timesteps_total": 547200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 87597.737, "num_steps_sampled": 547200, "update_time_ms": 2.56, "num_steps_trained": 547200, "load_time_ms": 0.645, "default": {"kl": 0.013700922951102257, "cur_lr": 4.999999873689376e-05, "entropy": 8.581720352172852, "total_loss": 15.576580047607422, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11471442133188248, "vf_explained_var": 0.9878559112548828, "vf_loss": 15.677420616149902}, "grad_time_ms": 778.745}, "pid": 3934253, "time_total_s": 47273.0536134243, "episode_reward_mean": -151.89341081545788, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.6812075607711, "policy_reward_mean": {}, "episodes_total": 10944, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.9082405590833, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-44-30", "training_iteration": 456, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756518270, "episode_len_mean": 50.0, "timesteps_since_restore": 547200, "time_since_restore": 47273.0536134243, "time_this_iter_s": 81.46984601020813, "iterations_since_restore": 456}
+{"timesteps_total": 548400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 87216.144, "num_steps_sampled": 548400, "update_time_ms": 2.516, "num_steps_trained": 548400, "load_time_ms": 0.642, "default": {"kl": 0.01370406523346901, "cur_lr": 4.999999873689376e-05, "entropy": 8.93803596496582, "total_loss": 12.964447975158691, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13340796530246735, "vf_explained_var": 0.9900917410850525, "vf_loss": 13.083980560302734}, "grad_time_ms": 780.955}, "pid": 3934253, "time_total_s": 47357.06767082214, "episode_reward_mean": -152.15835333609377, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.6812075607711, "policy_reward_mean": {}, "episodes_total": 10968, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -144.85171769932617, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-45-54", "training_iteration": 457, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756518354, "episode_len_mean": 50.0, "timesteps_since_restore": 548400, "time_since_restore": 47357.06767082214, "time_this_iter_s": 84.01405739784241, "iterations_since_restore": 457}
+{"timesteps_total": 549600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 85739.61, "num_steps_sampled": 549600, "update_time_ms": 2.575, "num_steps_trained": 549600, "load_time_ms": 0.634, "default": {"kl": 0.014458566904067993, "cur_lr": 4.999999873689376e-05, "entropy": 9.13646411895752, "total_loss": 24.75263214111328, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1529289335012436, "vf_explained_var": 0.9856938123703003, "vf_loss": 24.89092254638672}, "grad_time_ms": 787.371}, "pid": 3934253, "time_total_s": 47441.748109817505, "episode_reward_mean": -152.45268015512374, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -163.47316165078425, "policy_reward_mean": {}, "episodes_total": 10992, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.83832716227093, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-47-19", "training_iteration": 458, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756518439, "episode_len_mean": 50.0, "timesteps_since_restore": 549600, "time_since_restore": 47441.748109817505, "time_this_iter_s": 84.68043899536133, "iterations_since_restore": 458}
+{"timesteps_total": 550800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 86245.352, "num_steps_sampled": 550800, "update_time_ms": 2.582, "num_steps_trained": 550800, "load_time_ms": 0.629, "default": {"kl": 0.013391264714300632, "cur_lr": 4.999999873689376e-05, "entropy": 8.823755264282227, "total_loss": 14.624773025512695, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13322040438652039, "vf_explained_var": 0.9883681535720825, "vf_loss": 14.744434356689453}, "grad_time_ms": 775.532}, "pid": 3934253, "time_total_s": 47524.207596063614, "episode_reward_mean": -152.56532788778122, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.05595446037233, "policy_reward_mean": {}, "episodes_total": 11016, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.83832716227093, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-48-41", "training_iteration": 459, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756518521, "episode_len_mean": 50.0, "timesteps_since_restore": 550800, "time_since_restore": 47524.207596063614, "time_this_iter_s": 82.45948624610901, "iterations_since_restore": 459}
+{"timesteps_total": 552000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 87682.644, "num_steps_sampled": 552000, "update_time_ms": 2.574, "num_steps_trained": 552000, "load_time_ms": 0.628, "default": {"kl": 0.013323888182640076, "cur_lr": 4.999999873689376e-05, "entropy": 8.730342864990234, "total_loss": 11.686019897460938, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12599381804466248, "vf_explained_var": 0.9910435080528259, "vf_loss": 11.798521995544434}, "grad_time_ms": 762.306}, "pid": 3934253, "time_total_s": 47605.679342508316, "episode_reward_mean": -152.69599782071492, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.15027913277754, "policy_reward_mean": {}, "episodes_total": 11040, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.83832716227093, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-50-03", "training_iteration": 460, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756518603, "episode_len_mean": 50.0, "timesteps_since_restore": 552000, "time_since_restore": 47605.679342508316, "time_this_iter_s": 81.47174644470215, "iterations_since_restore": 460}
+{"timesteps_total": 553200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 85658.694, "num_steps_sampled": 553200, "update_time_ms": 2.538, "num_steps_trained": 553200, "load_time_ms": 0.629, "default": {"kl": 0.013982264325022697, "cur_lr": 4.999999873689376e-05, "entropy": 9.205830574035645, "total_loss": 15.154325485229492, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14719703793525696, "vf_explained_var": 0.9879705905914307, "vf_loss": 15.28736400604248}, "grad_time_ms": 771.098}, "pid": 3934253, "time_total_s": 47679.35620856285, "episode_reward_mean": -152.83043235107942, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.15027913277754, "policy_reward_mean": {}, "episodes_total": 11064, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.83832716227093, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-51-17", "training_iteration": 461, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756518677, "episode_len_mean": 50.0, "timesteps_since_restore": 553200, "time_since_restore": 47679.35620856285, "time_this_iter_s": 73.67686605453491, "iterations_since_restore": 461}
+{"timesteps_total": 554400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 83275.878, "num_steps_sampled": 554400, "update_time_ms": 2.468, "num_steps_trained": 554400, "load_time_ms": 0.626, "default": {"kl": 0.012878802604973316, "cur_lr": 4.999999873689376e-05, "entropy": 8.50555419921875, "total_loss": 23.786239624023438, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10327385365962982, "vf_explained_var": 0.9826846718788147, "vf_loss": 23.876474380493164}, "grad_time_ms": 775.122}, "pid": 3934253, "time_total_s": 47759.24594569206, "episode_reward_mean": -152.7505983426883, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.15027913277754, "policy_reward_mean": {}, "episodes_total": 11088, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.76403805622115, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-52-37", "training_iteration": 462, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756518757, "episode_len_mean": 50.0, "timesteps_since_restore": 554400, "time_since_restore": 47759.24594569206, "time_this_iter_s": 79.88973712921143, "iterations_since_restore": 462}
+{"timesteps_total": 555600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 82794.033, "num_steps_sampled": 555600, "update_time_ms": 2.455, "num_steps_trained": 555600, "load_time_ms": 0.617, "default": {"kl": 0.015895912423729897, "cur_lr": 4.999999873689376e-05, "entropy": 8.813871383666992, "total_loss": 14.076557159423828, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13516120612621307, "vf_explained_var": 0.9894052147865295, "vf_loss": 14.195623397827148}, "grad_time_ms": 782.395}, "pid": 3934253, "time_total_s": 47850.721262931824, "episode_reward_mean": -152.2892577230175, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.15027913277754, "policy_reward_mean": {}, "episodes_total": 11112, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.34770473592064, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-54-08", "training_iteration": 463, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756518848, "episode_len_mean": 50.0, "timesteps_since_restore": 555600, "time_since_restore": 47850.721262931824, "time_this_iter_s": 91.47531723976135, "iterations_since_restore": 463}
+{"timesteps_total": 556800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 82909.562, "num_steps_sampled": 556800, "update_time_ms": 2.463, "num_steps_trained": 556800, "load_time_ms": 0.611, "default": {"kl": 0.011852155439555645, "cur_lr": 4.999999873689376e-05, "entropy": 8.566987991333008, "total_loss": 18.03278350830078, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12470168620347977, "vf_explained_var": 0.9847335815429688, "vf_loss": 18.14548683166504}, "grad_time_ms": 789.835}, "pid": 3934253, "time_total_s": 47940.77296257019, "episode_reward_mean": -152.30313201018302, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.06550295241124, "policy_reward_mean": {}, "episodes_total": 11136, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.34770473592064, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-55-38", "training_iteration": 464, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756518938, "episode_len_mean": 50.0, "timesteps_since_restore": 556800, "time_since_restore": 47940.77296257019, "time_this_iter_s": 90.0516996383667, "iterations_since_restore": 464}
+{"timesteps_total": 558000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 84216.927, "num_steps_sampled": 558000, "update_time_ms": 2.396, "num_steps_trained": 558000, "load_time_ms": 0.608, "default": {"kl": 0.012260083109140396, "cur_lr": 4.999999873689376e-05, "entropy": 8.692615509033203, "total_loss": 18.573740005493164, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11373353004455566, "vf_explained_var": 0.9856255650520325, "vf_loss": 18.675060272216797}, "grad_time_ms": 791.581}, "pid": 3934253, "time_total_s": 48041.7510638237, "episode_reward_mean": -152.37701671887567, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.06550295241124, "policy_reward_mean": {}, "episodes_total": 11160, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.3475296198611, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-57-19", "training_iteration": 465, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756519039, "episode_len_mean": 50.0, "timesteps_since_restore": 558000, "time_since_restore": 48041.7510638237, "time_this_iter_s": 100.97810125350952, "iterations_since_restore": 465}
+{"timesteps_total": 559200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 86656.837, "num_steps_sampled": 559200, "update_time_ms": 2.46, "num_steps_trained": 559200, "load_time_ms": 0.605, "default": {"kl": 0.012025618925690651, "cur_lr": 4.999999873689376e-05, "entropy": 8.53043270111084, "total_loss": 22.949411392211914, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.09604374319314957, "vf_explained_var": 0.9813482761383057, "vf_loss": 23.03327751159668}, "grad_time_ms": 780.629}, "pid": 3934253, "time_total_s": 48147.51311826706, "episode_reward_mean": -152.1334569143758, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.1671981814466, "policy_reward_mean": {}, "episodes_total": 11184, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.3475296198611, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_03-59-05", "training_iteration": 466, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756519145, "episode_len_mean": 50.0, "timesteps_since_restore": 559200, "time_since_restore": 48147.51311826706, "time_this_iter_s": 105.76205444335938, "iterations_since_restore": 466}
+{"timesteps_total": 560400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 84268.461, "num_steps_sampled": 560400, "update_time_ms": 2.471, "num_steps_trained": 560400, "load_time_ms": 0.596, "default": {"kl": 0.013515968807041645, "cur_lr": 4.999999873689376e-05, "entropy": 8.63956069946289, "total_loss": 35.26797103881836, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1302550584077835, "vf_explained_var": 0.9756333827972412, "vf_loss": 35.38453674316406}, "grad_time_ms": 772.352}, "pid": 3934253, "time_total_s": 48207.5612487793, "episode_reward_mean": -152.34913469895858, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.1671981814466, "policy_reward_mean": {}, "episodes_total": 11208, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.3475296198611, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-00-05", "training_iteration": 467, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756519205, "episode_len_mean": 50.0, "timesteps_since_restore": 560400, "time_since_restore": 48207.5612487793, "time_this_iter_s": 60.04813051223755, "iterations_since_restore": 467}
+{"timesteps_total": 561600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 86247.813, "num_steps_sampled": 561600, "update_time_ms": 2.396, "num_steps_trained": 561600, "load_time_ms": 0.616, "default": {"kl": 0.01517908088862896, "cur_lr": 4.999999873689376e-05, "entropy": 8.516483306884766, "total_loss": 25.675064086914062, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13278795778751373, "vf_explained_var": 0.9793300628662109, "vf_loss": 25.79248046875}, "grad_time_ms": 735.951}, "pid": 3934253, "time_total_s": 48311.67094898224, "episode_reward_mean": -152.33652022821164, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -171.78358159200687, "policy_reward_mean": {}, "episodes_total": 11232, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.3475296198611, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-01-49", "training_iteration": 468, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756519309, "episode_len_mean": 50.0, "timesteps_since_restore": 561600, "time_since_restore": 48311.67094898224, "time_this_iter_s": 104.1097002029419, "iterations_since_restore": 468}
+{"timesteps_total": 562800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 85308.638, "num_steps_sampled": 562800, "update_time_ms": 2.412, "num_steps_trained": 562800, "load_time_ms": 0.631, "default": {"kl": 0.014354195445775986, "cur_lr": 4.999999873689376e-05, "entropy": 8.716632843017578, "total_loss": 18.791744232177734, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13407041132450104, "vf_explained_var": 0.9842908382415771, "vf_loss": 18.911279678344727}, "grad_time_ms": 716.768}, "pid": 3934253, "time_total_s": 48384.54709339142, "episode_reward_mean": -152.1603091373918, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -171.78358159200687, "policy_reward_mean": {}, "episodes_total": 11256, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.66110503693395, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-03-02", "training_iteration": 469, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756519382, "episode_len_mean": 50.0, "timesteps_since_restore": 562800, "time_since_restore": 48384.54709339142, "time_this_iter_s": 72.87614440917969, "iterations_since_restore": 469}
+{"timesteps_total": 564000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 85557.386, "num_steps_sampled": 564000, "update_time_ms": 2.487, "num_steps_trained": 564000, "load_time_ms": 0.629, "default": {"kl": 0.012945041991770267, "cur_lr": 4.999999873689376e-05, "entropy": 8.680624008178711, "total_loss": 14.99290657043457, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1172991693019867, "vf_explained_var": 0.9875587821006775, "vf_loss": 15.09709644317627}, "grad_time_ms": 727.509}, "pid": 3934253, "time_total_s": 48468.61529612541, "episode_reward_mean": -152.16666849056782, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -171.78358159200687, "policy_reward_mean": {}, "episodes_total": 11280, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.66110503693395, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-04-26", "training_iteration": 470, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756519466, "episode_len_mean": 50.0, "timesteps_since_restore": 564000, "time_since_restore": 48468.61529612541, "time_this_iter_s": 84.06820273399353, "iterations_since_restore": 470}
+{"timesteps_total": 565200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 84829.604, "num_steps_sampled": 565200, "update_time_ms": 2.513, "num_steps_trained": 565200, "load_time_ms": 0.628, "default": {"kl": 0.013851411640644073, "cur_lr": 4.999999873689376e-05, "entropy": 8.776378631591797, "total_loss": 18.787683486938477, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12572117149829865, "vf_explained_var": 0.9839603900909424, "vf_loss": 18.89937973022461}, "grad_time_ms": 720.516}, "pid": 3934253, "time_total_s": 48534.94466614723, "episode_reward_mean": -151.8701079268161, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -171.78358159200687, "policy_reward_mean": {}, "episodes_total": 11304, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -140.12124004568955, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-05-32", "training_iteration": 471, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756519532, "episode_len_mean": 50.0, "timesteps_since_restore": 565200, "time_since_restore": 48534.94466614723, "time_this_iter_s": 66.32937002182007, "iterations_since_restore": 471}
+{"timesteps_total": 566400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 87558.517, "num_steps_sampled": 566400, "update_time_ms": 2.515, "num_steps_trained": 566400, "load_time_ms": 0.633, "default": {"kl": 0.014027898199856281, "cur_lr": 4.999999873689376e-05, "entropy": 8.957261085510254, "total_loss": 13.951068878173828, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12973517179489136, "vf_explained_var": 0.9884995222091675, "vf_loss": 14.066600799560547}, "grad_time_ms": 712.553}, "pid": 3934253, "time_total_s": 48642.04382133484, "episode_reward_mean": -151.58204971842872, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -163.73309523071484, "policy_reward_mean": {}, "episodes_total": 11328, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -140.12124004568955, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-07-20", "training_iteration": 472, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756519640, "episode_len_mean": 50.0, "timesteps_since_restore": 566400, "time_since_restore": 48642.04382133484, "time_this_iter_s": 107.09915518760681, "iterations_since_restore": 472}
+{"timesteps_total": 567600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 89974.294, "num_steps_sampled": 567600, "update_time_ms": 2.496, "num_steps_trained": 567600, "load_time_ms": 0.64, "default": {"kl": 0.013087683357298374, "cur_lr": 4.999999873689376e-05, "entropy": 8.811269760131836, "total_loss": 18.32082176208496, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12677785754203796, "vf_explained_var": 0.9860605597496033, "vf_loss": 18.43434715270996}, "grad_time_ms": 711.183}, "pid": 3934253, "time_total_s": 48757.663786411285, "episode_reward_mean": -151.90309347607905, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -172.55473715921238, "policy_reward_mean": {}, "episodes_total": 11352, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -140.12124004568955, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-09-15", "training_iteration": 473, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756519755, "episode_len_mean": 50.0, "timesteps_since_restore": 567600, "time_since_restore": 48757.663786411285, "time_this_iter_s": 115.61996507644653, "iterations_since_restore": 473}
+{"timesteps_total": 568800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 88965.22, "num_steps_sampled": 568800, "update_time_ms": 2.509, "num_steps_trained": 568800, "load_time_ms": 0.644, "default": {"kl": 0.012866493314504623, "cur_lr": 4.999999873689376e-05, "entropy": 8.510658264160156, "total_loss": 26.638233184814453, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12175793200731277, "vf_explained_var": 0.9810941219329834, "vf_loss": 26.746965408325195}, "grad_time_ms": 709.255}, "pid": 3934253, "time_total_s": 48837.60676407814, "episode_reward_mean": -152.0306529598313, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -172.55473715921238, "policy_reward_mean": {}, "episodes_total": 11376, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -140.12124004568955, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-10-35", "training_iteration": 474, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756519835, "episode_len_mean": 50.0, "timesteps_since_restore": 568800, "time_since_restore": 48837.60676407814, "time_this_iter_s": 79.94297766685486, "iterations_since_restore": 474}
+{"timesteps_total": 570000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 90873.243, "num_steps_sampled": 570000, "update_time_ms": 2.543, "num_steps_trained": 570000, "load_time_ms": 0.644, "default": {"kl": 0.014776766300201416, "cur_lr": 4.999999873689376e-05, "entropy": 8.7134370803833, "total_loss": 16.30389976501465, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12294605374336243, "vf_explained_var": 0.98764967918396, "vf_loss": 16.411884307861328}, "grad_time_ms": 703.627}, "pid": 3934253, "time_total_s": 48957.60911512375, "episode_reward_mean": -152.09120920384555, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -172.55473715921238, "policy_reward_mean": {}, "episodes_total": 11400, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.91291809163678, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-12-35", "training_iteration": 475, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756519955, "episode_len_mean": 50.0, "timesteps_since_restore": 570000, "time_since_restore": 48957.60911512375, "time_this_iter_s": 120.00235104560852, "iterations_since_restore": 475}
+{"timesteps_total": 571200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 90429.236, "num_steps_sampled": 571200, "update_time_ms": 2.448, "num_steps_trained": 571200, "load_time_ms": 0.642, "default": {"kl": 0.01419066358357668, "cur_lr": 4.999999873689376e-05, "entropy": 8.54990005493164, "total_loss": 25.738264083862305, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1428092122077942, "vf_explained_var": 0.9800757169723511, "vf_loss": 25.866703033447266}, "grad_time_ms": 706.412}, "pid": 3934253, "time_total_s": 49058.956107616425, "episode_reward_mean": -152.22941988879649, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -172.55473715921238, "policy_reward_mean": {}, "episodes_total": 11424, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -136.4136753827509, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-14-17", "training_iteration": 476, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756520057, "episode_len_mean": 50.0, "timesteps_since_restore": 571200, "time_since_restore": 49058.956107616425, "time_this_iter_s": 101.34699249267578, "iterations_since_restore": 476}
+{"timesteps_total": 572400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93479.099, "num_steps_sampled": 572400, "update_time_ms": 2.507, "num_steps_trained": 572400, "load_time_ms": 0.642, "default": {"kl": 0.013474556617438793, "cur_lr": 4.999999873689376e-05, "entropy": 8.342805862426758, "total_loss": 14.449737548828125, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11618823558092117, "vf_explained_var": 0.9880461096763611, "vf_loss": 14.55228328704834}, "grad_time_ms": 705.134}, "pid": 3934253, "time_total_s": 49149.48967766762, "episode_reward_mean": -151.62238611297823, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.26034009197124, "policy_reward_mean": {}, "episodes_total": 11448, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -136.4136753827509, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-15-47", "training_iteration": 477, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756520147, "episode_len_mean": 50.0, "timesteps_since_restore": 572400, "time_since_restore": 49149.48967766762, "time_this_iter_s": 90.53357005119324, "iterations_since_restore": 477}
+{"timesteps_total": 573600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93793.331, "num_steps_sampled": 573600, "update_time_ms": 2.49, "num_steps_trained": 573600, "load_time_ms": 0.626, "default": {"kl": 0.01338463556021452, "cur_lr": 4.999999873689376e-05, "entropy": 8.503240585327148, "total_loss": 9.148031234741211, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13191653788089752, "vf_explained_var": 0.9921321868896484, "vf_loss": 9.266396522521973}, "grad_time_ms": 741.434}, "pid": 3934253, "time_total_s": 49257.10363698006, "episode_reward_mean": -151.4530620575066, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.26034009197124, "policy_reward_mean": {}, "episodes_total": 11472, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -135.57837804089226, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-17-35", "training_iteration": 478, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756520255, "episode_len_mean": 50.0, "timesteps_since_restore": 573600, "time_since_restore": 49257.10363698006, "time_this_iter_s": 107.61395931243896, "iterations_since_restore": 478}
+{"timesteps_total": 574800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 97189.048, "num_steps_sampled": 574800, "update_time_ms": 2.439, "num_steps_trained": 574800, "load_time_ms": 0.615, "default": {"kl": 0.011861172504723072, "cur_lr": 4.999999873689376e-05, "entropy": 8.364619255065918, "total_loss": 19.182300567626953, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10482161492109299, "vf_explained_var": 0.9860363006591797, "vf_loss": 19.27511215209961}, "grad_time_ms": 755.483}, "pid": 3934253, "time_total_s": 49364.076297044754, "episode_reward_mean": -151.58091026932988, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.55116997370476, "policy_reward_mean": {}, "episodes_total": 11496, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -135.57837804089226, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-19-22", "training_iteration": 479, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756520362, "episode_len_mean": 50.0, "timesteps_since_restore": 574800, "time_since_restore": 49364.076297044754, "time_this_iter_s": 106.97266006469727, "iterations_since_restore": 479}
+{"timesteps_total": 576000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 98683.422, "num_steps_sampled": 576000, "update_time_ms": 2.427, "num_steps_trained": 576000, "load_time_ms": 0.622, "default": {"kl": 0.01613686792552471, "cur_lr": 4.999999873689376e-05, "entropy": 8.356700897216797, "total_loss": 24.950077056884766, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12751537561416626, "vf_explained_var": 0.9812971949577332, "vf_loss": 25.06125259399414}, "grad_time_ms": 730.311}, "pid": 3934253, "time_total_s": 49462.835492134094, "episode_reward_mean": -151.87481690855805, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.55116997370476, "policy_reward_mean": {}, "episodes_total": 11520, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -135.57837804089226, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-21-00", "training_iteration": 480, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756520460, "episode_len_mean": 50.0, "timesteps_since_restore": 576000, "time_since_restore": 49462.835492134094, "time_this_iter_s": 98.75919508934021, "iterations_since_restore": 480}
+{"timesteps_total": 577200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 100784.428, "num_steps_sampled": 577200, "update_time_ms": 2.385, "num_steps_trained": 577200, "load_time_ms": 0.616, "default": {"kl": 0.012163571082055569, "cur_lr": 4.999999873689376e-05, "entropy": 8.392812728881836, "total_loss": 25.000938415527344, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12458840012550354, "vf_explained_var": 0.9825544357299805, "vf_loss": 25.11321258544922}, "grad_time_ms": 737.015}, "pid": 3934253, "time_total_s": 49550.242958545685, "episode_reward_mean": -151.85538016827485, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.55116997370476, "policy_reward_mean": {}, "episodes_total": 11544, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -135.57837804089226, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-22-28", "training_iteration": 481, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756520548, "episode_len_mean": 50.0, "timesteps_since_restore": 577200, "time_since_restore": 49550.242958545685, "time_this_iter_s": 87.40746641159058, "iterations_since_restore": 481}
+{"timesteps_total": 578400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 100695.819, "num_steps_sampled": 578400, "update_time_ms": 2.41, "num_steps_trained": 578400, "load_time_ms": 0.609, "default": {"kl": 0.013876695185899734, "cur_lr": 4.999999873689376e-05, "entropy": 8.447154998779297, "total_loss": 19.025917053222656, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12835945188999176, "vf_explained_var": 0.9851945042610168, "vf_loss": 19.14022445678711}, "grad_time_ms": 737.976}, "pid": 3934253, "time_total_s": 49656.465804338455, "episode_reward_mean": -152.47601961931696, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -168.89862092308448, "policy_reward_mean": {}, "episodes_total": 11568, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -145.96097054937832, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-24-14", "training_iteration": 482, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756520654, "episode_len_mean": 50.0, "timesteps_since_restore": 578400, "time_since_restore": 49656.465804338455, "time_this_iter_s": 106.22284579277039, "iterations_since_restore": 482}
+{"timesteps_total": 579600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 98965.575, "num_steps_sampled": 579600, "update_time_ms": 2.424, "num_steps_trained": 579600, "load_time_ms": 0.602, "default": {"kl": 0.012843552976846695, "cur_lr": 4.999999873689376e-05, "entropy": 8.421581268310547, "total_loss": 28.655893325805664, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1217130795121193, "vf_explained_var": 0.981257438659668, "vf_loss": 28.764604568481445}, "grad_time_ms": 737.339}, "pid": 3934253, "time_total_s": 49754.775631427765, "episode_reward_mean": -152.4899519121384, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -168.89862092308448, "policy_reward_mean": {}, "episodes_total": 11592, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -145.96097054937832, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-25-53", "training_iteration": 483, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756520753, "episode_len_mean": 50.0, "timesteps_since_restore": 579600, "time_since_restore": 49754.775631427765, "time_this_iter_s": 98.30982708930969, "iterations_since_restore": 483}
+{"timesteps_total": 580800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 99783.682, "num_steps_sampled": 580800, "update_time_ms": 2.383, "num_steps_trained": 580800, "load_time_ms": 0.601, "default": {"kl": 0.01166777778416872, "cur_lr": 4.999999873689376e-05, "entropy": 8.177492141723633, "total_loss": 34.3708610534668, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11253535747528076, "vf_explained_var": 0.9757702350616455, "vf_loss": 34.47157669067383}, "grad_time_ms": 733.529}, "pid": 3934253, "time_total_s": 49842.862073898315, "episode_reward_mean": -152.56202090243394, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -168.89862092308448, "policy_reward_mean": {}, "episodes_total": 11616, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.9760204444557, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-27-21", "training_iteration": 484, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756520841, "episode_len_mean": 50.0, "timesteps_since_restore": 580800, "time_since_restore": 49842.862073898315, "time_this_iter_s": 88.08644247055054, "iterations_since_restore": 484}
+{"timesteps_total": 582000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 98990.706, "num_steps_sampled": 582000, "update_time_ms": 2.384, "num_steps_trained": 582000, "load_time_ms": 0.606, "default": {"kl": 0.013997341506183147, "cur_lr": 4.999999873689376e-05, "entropy": 8.29512882232666, "total_loss": 22.684200286865234, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12494519352912903, "vf_explained_var": 0.9818886518478394, "vf_loss": 22.79497528076172}, "grad_time_ms": 736.962}, "pid": 3934253, "time_total_s": 49954.96886229515, "episode_reward_mean": -152.57537375459523, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -168.89862092308448, "policy_reward_mean": {}, "episodes_total": 11640, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.9760204444557, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-29-13", "training_iteration": 485, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756520953, "episode_len_mean": 50.0, "timesteps_since_restore": 582000, "time_since_restore": 49954.96886229515, "time_this_iter_s": 112.10678839683533, "iterations_since_restore": 485}
+{"timesteps_total": 583200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 96576.534, "num_steps_sampled": 583200, "update_time_ms": 2.437, "num_steps_trained": 583200, "load_time_ms": 0.607, "default": {"kl": 0.013433815911412239, "cur_lr": 4.999999873689376e-05, "entropy": 8.316619873046875, "total_loss": 12.3467378616333, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13029375672340393, "vf_explained_var": 0.9899523258209229, "vf_loss": 12.46342945098877}, "grad_time_ms": 731.844}, "pid": 3934253, "time_total_s": 50032.12375879288, "episode_reward_mean": -152.43522240052872, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.16920054641602, "policy_reward_mean": {}, "episodes_total": 11664, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.9760204444557, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-30-30", "training_iteration": 486, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756521030, "episode_len_mean": 50.0, "timesteps_since_restore": 583200, "time_since_restore": 50032.12375879288, "time_this_iter_s": 77.15489649772644, "iterations_since_restore": 486}
+{"timesteps_total": 584400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 97872.595, "num_steps_sampled": 584400, "update_time_ms": 2.376, "num_steps_trained": 584400, "load_time_ms": 0.613, "default": {"kl": 0.01336054690182209, "cur_lr": 4.999999873689376e-05, "entropy": 8.06357479095459, "total_loss": 14.853938102722168, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11478282511234283, "vf_explained_var": 0.9875580668449402, "vf_loss": 14.955193519592285}, "grad_time_ms": 744.769}, "pid": 3934253, "time_total_s": 50135.74773335457, "episode_reward_mean": -152.2744356165756, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.16920054641602, "policy_reward_mean": {}, "episodes_total": 11688, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.9760204444557, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-32-14", "training_iteration": 487, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756521134, "episode_len_mean": 50.0, "timesteps_since_restore": 584400, "time_since_restore": 50135.74773335457, "time_this_iter_s": 103.62397456169128, "iterations_since_restore": 487}
+{"timesteps_total": 585600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93514.198, "num_steps_sampled": 585600, "update_time_ms": 2.391, "num_steps_trained": 585600, "load_time_ms": 0.623, "default": {"kl": 0.012713328003883362, "cur_lr": 4.999999873689376e-05, "entropy": 8.406123161315918, "total_loss": 9.236263275146484, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13359849154949188, "vf_explained_var": 0.9921019673347473, "vf_loss": 9.356989860534668}, "grad_time_ms": 744.189}, "pid": 3934253, "time_total_s": 50199.77256655693, "episode_reward_mean": -151.7673940732708, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -163.00463867105913, "policy_reward_mean": {}, "episodes_total": 11712, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.86458163390066, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-33-18", "training_iteration": 488, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756521198, "episode_len_mean": 50.0, "timesteps_since_restore": 585600, "time_since_restore": 50199.77256655693, "time_this_iter_s": 64.02483320236206, "iterations_since_restore": 488}
+{"timesteps_total": 586800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 90967.011, "num_steps_sampled": 586800, "update_time_ms": 2.398, "num_steps_trained": 586800, "load_time_ms": 0.616, "default": {"kl": 0.01239168830215931, "cur_lr": 4.999999873689376e-05, "entropy": 8.362218856811523, "total_loss": 16.686716079711914, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13526107370853424, "vf_explained_var": 0.9881305694580078, "vf_loss": 16.809431076049805}, "grad_time_ms": 760.819}, "pid": 3934253, "time_total_s": 50281.43963265419, "episode_reward_mean": -151.5349348740325, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -162.58932785547924, "policy_reward_mean": {}, "episodes_total": 11736, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.86458163390066, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-34-39", "training_iteration": 489, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756521279, "episode_len_mean": 50.0, "timesteps_since_restore": 586800, "time_since_restore": 50281.43963265419, "time_this_iter_s": 81.66706609725952, "iterations_since_restore": 489}
+{"timesteps_total": 588000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92896.958, "num_steps_sampled": 588000, "update_time_ms": 2.35, "num_steps_trained": 588000, "load_time_ms": 0.613, "default": {"kl": 0.012361129745841026, "cur_lr": 4.999999873689376e-05, "entropy": 8.23472785949707, "total_loss": 17.43859100341797, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10937649011611938, "vf_explained_var": 0.9870246052742004, "vf_loss": 17.53545379638672}, "grad_time_ms": 782.39}, "pid": 3934253, "time_total_s": 50399.713121175766, "episode_reward_mean": -151.58134547998327, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.551374223564, "policy_reward_mean": {}, "episodes_total": 11760, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.86458163390066, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-36-38", "training_iteration": 490, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756521398, "episode_len_mean": 50.0, "timesteps_since_restore": 588000, "time_since_restore": 50399.713121175766, "time_this_iter_s": 118.27348852157593, "iterations_since_restore": 490}
+{"timesteps_total": 589200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92667.249, "num_steps_sampled": 589200, "update_time_ms": 2.376, "num_steps_trained": 589200, "load_time_ms": 0.62, "default": {"kl": 0.013915492221713066, "cur_lr": 4.999999873689376e-05, "entropy": 8.18729019165039, "total_loss": 21.70315170288086, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11174440383911133, "vf_explained_var": 0.9853160381317139, "vf_loss": 21.80080795288086}, "grad_time_ms": 755.628}, "pid": 3934253, "time_total_s": 50484.555617809296, "episode_reward_mean": -151.50221318432668, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.551374223564, "policy_reward_mean": {}, "episodes_total": 11784, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.86458163390066, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-38-02", "training_iteration": 491, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756521482, "episode_len_mean": 50.0, "timesteps_since_restore": 589200, "time_since_restore": 50484.555617809296, "time_this_iter_s": 84.84249663352966, "iterations_since_restore": 491}
+{"timesteps_total": 590400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92900.83, "num_steps_sampled": 590400, "update_time_ms": 2.378, "num_steps_trained": 590400, "load_time_ms": 0.625, "default": {"kl": 0.014017928391695023, "cur_lr": 4.999999873689376e-05, "entropy": 8.306546211242676, "total_loss": 17.22860336303711, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13493818044662476, "vf_explained_var": 0.9866151809692383, "vf_loss": 17.349348068237305}, "grad_time_ms": 753.323}, "pid": 3934253, "time_total_s": 50593.09105873108, "episode_reward_mean": -151.9797859047839, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.551374223564, "policy_reward_mean": {}, "episodes_total": 11808, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.82675790269593, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-39-51", "training_iteration": 492, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756521591, "episode_len_mean": 50.0, "timesteps_since_restore": 590400, "time_since_restore": 50593.09105873108, "time_this_iter_s": 108.53544092178345, "iterations_since_restore": 492}
+{"timesteps_total": 591600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 91710.711, "num_steps_sampled": 591600, "update_time_ms": 2.399, "num_steps_trained": 591600, "load_time_ms": 0.626, "default": {"kl": 0.014049972407519817, "cur_lr": 4.999999873689376e-05, "entropy": 8.360414505004883, "total_loss": 17.243886947631836, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13506677746772766, "vf_explained_var": 0.9864630103111267, "vf_loss": 17.364728927612305}, "grad_time_ms": 739.928}, "pid": 3934253, "time_total_s": 50679.36665248871, "episode_reward_mean": -152.28057967114154, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.551374223564, "policy_reward_mean": {}, "episodes_total": 11832, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.94920052597337, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-41-17", "training_iteration": 493, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756521677, "episode_len_mean": 50.0, "timesteps_since_restore": 591600, "time_since_restore": 50679.36665248871, "time_this_iter_s": 86.2755937576294, "iterations_since_restore": 493}
+{"timesteps_total": 592800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94009.661, "num_steps_sampled": 592800, "update_time_ms": 2.403, "num_steps_trained": 592800, "load_time_ms": 0.626, "default": {"kl": 0.012254327535629272, "cur_lr": 4.999999873689376e-05, "entropy": 8.261372566223145, "total_loss": 24.526485443115234, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11333407461643219, "vf_explained_var": 0.98112553358078, "vf_loss": 24.627412796020508}, "grad_time_ms": 745.345}, "pid": 3934253, "time_total_s": 50790.49547314644, "episode_reward_mean": -152.2664411603055, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.55358103574406, "policy_reward_mean": {}, "episodes_total": 11856, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.94920052597337, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-43-08", "training_iteration": 494, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756521788, "episode_len_mean": 50.0, "timesteps_since_restore": 592800, "time_since_restore": 50790.49547314644, "time_this_iter_s": 111.1288206577301, "iterations_since_restore": 494}
+{"timesteps_total": 594000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93719.151, "num_steps_sampled": 594000, "update_time_ms": 2.353, "num_steps_trained": 594000, "load_time_ms": 0.623, "default": {"kl": 0.010306322015821934, "cur_lr": 4.999999873689376e-05, "entropy": 8.569117546081543, "total_loss": 38.06904983520508, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12895271182060242, "vf_explained_var": 0.9726418256759644, "vf_loss": 38.18756866455078}, "grad_time_ms": 743.518}, "pid": 3934253, "time_total_s": 50899.678308963776, "episode_reward_mean": -152.4053046281917, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -179.23954249428897, "policy_reward_mean": {}, "episodes_total": 11880, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.94920052597337, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-44-58", "training_iteration": 495, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756521898, "episode_len_mean": 50.0, "timesteps_since_restore": 594000, "time_since_restore": 50899.678308963776, "time_this_iter_s": 109.18283581733704, "iterations_since_restore": 495}
+{"timesteps_total": 595200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95340.754, "num_steps_sampled": 595200, "update_time_ms": 2.369, "num_steps_trained": 595200, "load_time_ms": 0.622, "default": {"kl": 0.014301293529570103, "cur_lr": 4.999999873689376e-05, "entropy": 8.337230682373047, "total_loss": 23.737653732299805, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11855091154575348, "vf_explained_var": 0.9828669428825378, "vf_loss": 23.841726303100586}, "grad_time_ms": 753.814}, "pid": 3934253, "time_total_s": 50993.152535676956, "episode_reward_mean": -152.48441120424198, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -179.23954249428897, "policy_reward_mean": {}, "episodes_total": 11904, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.24911084280703, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-46-31", "training_iteration": 496, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756521991, "episode_len_mean": 50.0, "timesteps_since_restore": 595200, "time_since_restore": 50993.152535676956, "time_this_iter_s": 93.47422671318054, "iterations_since_restore": 496}
+{"timesteps_total": 596400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93168.107, "num_steps_sampled": 596400, "update_time_ms": 2.414, "num_steps_trained": 596400, "load_time_ms": 0.622, "default": {"kl": 0.011234988458454609, "cur_lr": 4.999999873689376e-05, "entropy": 8.054550170898438, "total_loss": 32.349212646484375, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10083127021789551, "vf_explained_var": 0.9761930108070374, "vf_loss": 32.43867111206055}, "grad_time_ms": 745.001}, "pid": 3934253, "time_total_s": 51074.96179127693, "episode_reward_mean": -152.2923890436924, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -179.23954249428897, "policy_reward_mean": {}, "episodes_total": 11928, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.24911084280703, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-47-53", "training_iteration": 497, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756522073, "episode_len_mean": 50.0, "timesteps_since_restore": 596400, "time_since_restore": 51074.96179127693, "time_this_iter_s": 81.80925559997559, "iterations_since_restore": 497}
+{"timesteps_total": 597600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 96494.6, "num_steps_sampled": 597600, "update_time_ms": 2.411, "num_steps_trained": 597600, "load_time_ms": 0.606, "default": {"kl": 0.013591241091489792, "cur_lr": 4.999999873689376e-05, "entropy": 8.476066589355469, "total_loss": 22.050931930541992, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13895396888256073, "vf_explained_var": 0.9850466847419739, "vf_loss": 22.176122665405273}, "grad_time_ms": 745.967}, "pid": 3934253, "time_total_s": 51172.260909318924, "episode_reward_mean": -152.56113232132276, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -179.23954249428897, "policy_reward_mean": {}, "episodes_total": 11952, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.24911084280703, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-49-30", "training_iteration": 498, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756522170, "episode_len_mean": 50.0, "timesteps_since_restore": 597600, "time_since_restore": 51172.260909318924, "time_this_iter_s": 97.29911804199219, "iterations_since_restore": 498}
+{"timesteps_total": 598800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 97191.043, "num_steps_sampled": 598800, "update_time_ms": 2.462, "num_steps_trained": 598800, "load_time_ms": 0.607, "default": {"kl": 0.01398612093180418, "cur_lr": 4.999999873689376e-05, "entropy": 8.27603530883789, "total_loss": 11.802041053771973, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1316160410642624, "vf_explained_var": 0.9910869598388672, "vf_loss": 11.919496536254883}, "grad_time_ms": 742.414}, "pid": 3934253, "time_total_s": 51260.85743522644, "episode_reward_mean": -152.5133723821843, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -179.23954249428897, "policy_reward_mean": {}, "episodes_total": 11976, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.76588621311848, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-50-59", "training_iteration": 499, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756522259, "episode_len_mean": 50.0, "timesteps_since_restore": 598800, "time_since_restore": 51260.85743522644, "time_this_iter_s": 88.59652590751648, "iterations_since_restore": 499}
+{"timesteps_total": 600000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95862.436, "num_steps_sampled": 600000, "update_time_ms": 2.474, "num_steps_trained": 600000, "load_time_ms": 0.618, "default": {"kl": 0.01472895685583353, "cur_lr": 4.999999873689376e-05, "entropy": 8.435802459716797, "total_loss": 18.317811965942383, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13494382798671722, "vf_explained_var": 0.987172544002533, "vf_loss": 18.437843322753906}, "grad_time_ms": 734.813}, "pid": 3934253, "time_total_s": 51365.77009224892, "episode_reward_mean": -152.01744841803412, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -168.45596695942382, "policy_reward_mean": {}, "episodes_total": 12000, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.76588621311848, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-52-44", "training_iteration": 500, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756522364, "episode_len_mean": 50.0, "timesteps_since_restore": 600000, "time_since_restore": 51365.77009224892, "time_this_iter_s": 104.9126570224762, "iterations_since_restore": 500}
+{"timesteps_total": 601200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95968.686, "num_steps_sampled": 601200, "update_time_ms": 2.619, "num_steps_trained": 601200, "load_time_ms": 0.609, "default": {"kl": 0.012464533559978008, "cur_lr": 4.999999873689376e-05, "entropy": 7.986090660095215, "total_loss": 15.645466804504395, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12583625316619873, "vf_explained_var": 0.9872433543205261, "vf_loss": 15.758684158325195}, "grad_time_ms": 736.879}, "pid": 3934253, "time_total_s": 51451.69588470459, "episode_reward_mean": -152.01037099071374, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -168.45596695942382, "policy_reward_mean": {}, "episodes_total": 12024, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.76588621311848, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-54-10", "training_iteration": 501, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756522450, "episode_len_mean": 50.0, "timesteps_since_restore": 601200, "time_since_restore": 51451.69588470459, "time_this_iter_s": 85.92579245567322, "iterations_since_restore": 501}
+{"timesteps_total": 602400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93102.896, "num_steps_sampled": 602400, "update_time_ms": 2.611, "num_steps_trained": 602400, "load_time_ms": 0.613, "default": {"kl": 0.012951488606631756, "cur_lr": 4.999999873689376e-05, "entropy": 8.401609420776367, "total_loss": 14.38691520690918, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11934472620487213, "vf_explained_var": 0.9900305867195129, "vf_loss": 14.493144989013672}, "grad_time_ms": 738.486}, "pid": 3934253, "time_total_s": 51531.589405059814, "episode_reward_mean": -151.48011399393036, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.17799719138918, "policy_reward_mean": {}, "episodes_total": 12048, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.76588621311848, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-55-30", "training_iteration": 502, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756522530, "episode_len_mean": 50.0, "timesteps_since_restore": 602400, "time_since_restore": 51531.589405059814, "time_this_iter_s": 79.89352035522461, "iterations_since_restore": 502}
+{"timesteps_total": 603600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92845.764, "num_steps_sampled": 603600, "update_time_ms": 2.592, "num_steps_trained": 603600, "load_time_ms": 0.611, "default": {"kl": 0.012631156481802464, "cur_lr": 4.999999873689376e-05, "entropy": 8.266646385192871, "total_loss": 20.765531539916992, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13049106299877167, "vf_explained_var": 0.9859540462493896, "vf_loss": 20.88323402404785}, "grad_time_ms": 749.962}, "pid": 3934253, "time_total_s": 51615.408281326294, "episode_reward_mean": -151.95040094615155, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.17799719138918, "policy_reward_mean": {}, "episodes_total": 12072, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.91358491840785, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-56-53", "training_iteration": 503, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756522613, "episode_len_mean": 50.0, "timesteps_since_restore": 603600, "time_since_restore": 51615.408281326294, "time_this_iter_s": 83.81887626647949, "iterations_since_restore": 503}
+{"timesteps_total": 604800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 91623.159, "num_steps_sampled": 604800, "update_time_ms": 2.684, "num_steps_trained": 604800, "load_time_ms": 0.615, "default": {"kl": 0.0131832305341959, "cur_lr": 4.999999873689376e-05, "entropy": 8.261452674865723, "total_loss": 21.291887283325195, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13001152873039246, "vf_explained_var": 0.9833104610443115, "vf_loss": 21.408550262451172}, "grad_time_ms": 748.329}, "pid": 3934253, "time_total_s": 51714.29527378082, "episode_reward_mean": -152.11592553945118, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -169.47437276213114, "policy_reward_mean": {}, "episodes_total": 12096, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.91358491840785, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_04-58-32", "training_iteration": 504, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756522712, "episode_len_mean": 50.0, "timesteps_since_restore": 604800, "time_since_restore": 51714.29527378082, "time_this_iter_s": 98.88699245452881, "iterations_since_restore": 504}
+{"timesteps_total": 606000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 91136.225, "num_steps_sampled": 606000, "update_time_ms": 2.73, "num_steps_trained": 606000, "load_time_ms": 0.622, "default": {"kl": 0.012735579162836075, "cur_lr": 4.999999873689376e-05, "entropy": 8.163020133972168, "total_loss": 32.51218795776367, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12349916994571686, "vf_explained_var": 0.9749259948730469, "vf_loss": 32.62278747558594}, "grad_time_ms": 744.669}, "pid": 3934253, "time_total_s": 51818.57286596298, "episode_reward_mean": -152.21681742415493, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -174.96017940841094, "policy_reward_mean": {}, "episodes_total": 12120, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.91358491840785, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-00-17", "training_iteration": 505, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756522817, "episode_len_mean": 50.0, "timesteps_since_restore": 606000, "time_since_restore": 51818.57286596298, "time_this_iter_s": 104.27759218215942, "iterations_since_restore": 505}
+{"timesteps_total": 607200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 91654.218, "num_steps_sampled": 607200, "update_time_ms": 2.733, "num_steps_trained": 607200, "load_time_ms": 0.629, "default": {"kl": 0.010551582090556622, "cur_lr": 4.999999873689376e-05, "entropy": 8.147479057312012, "total_loss": 24.14088249206543, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1115213930606842, "vf_explained_var": 0.9835327863693237, "vf_loss": 24.241722106933594}, "grad_time_ms": 730.711}, "pid": 3934253, "time_total_s": 51917.086246967316, "episode_reward_mean": -152.5065032769895, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -174.96017940841094, "policy_reward_mean": {}, "episodes_total": 12144, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -145.65283452681913, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-01-55", "training_iteration": 506, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756522915, "episode_len_mean": 50.0, "timesteps_since_restore": 607200, "time_since_restore": 51917.086246967316, "time_this_iter_s": 98.5133810043335, "iterations_since_restore": 506}
+{"timesteps_total": 608400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92937.584, "num_steps_sampled": 608400, "update_time_ms": 2.651, "num_steps_trained": 608400, "load_time_ms": 0.627, "default": {"kl": 0.013679493218660355, "cur_lr": 4.999999873689376e-05, "entropy": 8.322196006774902, "total_loss": 17.807706832885742, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13080990314483643, "vf_explained_var": 0.989324152469635, "vf_loss": 17.924665451049805}, "grad_time_ms": 723.807}, "pid": 3934253, "time_total_s": 52011.65894627571, "episode_reward_mean": -152.62199344445517, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -174.96017940841094, "policy_reward_mean": {}, "episodes_total": 12168, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -145.65283452681913, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-03-30", "training_iteration": 507, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756523010, "episode_len_mean": 50.0, "timesteps_since_restore": 608400, "time_since_restore": 52011.65894627571, "time_this_iter_s": 94.57269930839539, "iterations_since_restore": 507}
+{"timesteps_total": 609600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94056.282, "num_steps_sampled": 609600, "update_time_ms": 2.661, "num_steps_trained": 609600, "load_time_ms": 0.636, "default": {"kl": 0.01215057447552681, "cur_lr": 4.999999873689376e-05, "entropy": 8.190593719482422, "total_loss": 19.476789474487305, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1280270367860794, "vf_explained_var": 0.9847090840339661, "vf_loss": 19.592514038085938}, "grad_time_ms": 700.216}, "pid": 3934253, "time_total_s": 52119.90881872177, "episode_reward_mean": -152.45247066530314, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -174.96017940841094, "policy_reward_mean": {}, "episodes_total": 12192, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -145.65283452681913, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-05-18", "training_iteration": 508, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756523118, "episode_len_mean": 50.0, "timesteps_since_restore": 609600, "time_since_restore": 52119.90881872177, "time_this_iter_s": 108.24987244606018, "iterations_since_restore": 508}
+{"timesteps_total": 610800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95930.862, "num_steps_sampled": 610800, "update_time_ms": 2.656, "num_steps_trained": 610800, "load_time_ms": 0.642, "default": {"kl": 0.012087873183190823, "cur_lr": 4.999999873689376e-05, "entropy": 8.145371437072754, "total_loss": 15.570717811584473, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10609038174152374, "vf_explained_var": 0.986748456954956, "vf_loss": 15.664569854736328}, "grad_time_ms": 692.213}, "pid": 3934253, "time_total_s": 52227.17141199112, "episode_reward_mean": -151.90335354787902, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.31865727551698, "policy_reward_mean": {}, "episodes_total": 12216, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -137.2146438832549, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-07-05", "training_iteration": 509, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756523225, "episode_len_mean": 50.0, "timesteps_since_restore": 610800, "time_since_restore": 52227.17141199112, "time_this_iter_s": 107.26259326934814, "iterations_since_restore": 509}
+{"timesteps_total": 612000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95583.025, "num_steps_sampled": 612000, "update_time_ms": 2.643, "num_steps_trained": 612000, "load_time_ms": 0.629, "default": {"kl": 0.013264824636280537, "cur_lr": 4.999999873689376e-05, "entropy": 8.246790885925293, "total_loss": 45.29621505737305, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12984015047550201, "vf_explained_var": 0.9686688184738159, "vf_loss": 45.41261672973633}, "grad_time_ms": 703.069}, "pid": 3934253, "time_total_s": 52328.713398218155, "episode_reward_mean": -151.84296176241773, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -176.07474622271582, "policy_reward_mean": {}, "episodes_total": 12240, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -137.2146438832549, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-08-47", "training_iteration": 510, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756523327, "episode_len_mean": 50.0, "timesteps_since_restore": 612000, "time_since_restore": 52328.713398218155, "time_this_iter_s": 101.54198622703552, "iterations_since_restore": 510}
+{"timesteps_total": 613200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 97924.306, "num_steps_sampled": 613200, "update_time_ms": 2.498, "num_steps_trained": 613200, "load_time_ms": 0.628, "default": {"kl": 0.01422956120222807, "cur_lr": 4.999999873689376e-05, "entropy": 8.146068572998047, "total_loss": 11.832422256469727, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13532468676567078, "vf_explained_var": 0.99064040184021, "vf_loss": 11.953340530395508}, "grad_time_ms": 719.987}, "pid": 3934253, "time_total_s": 52438.22039580345, "episode_reward_mean": -151.5841077330452, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -176.07474622271582, "policy_reward_mean": {}, "episodes_total": 12264, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -137.2146438832549, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-10-36", "training_iteration": 511, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756523436, "episode_len_mean": 50.0, "timesteps_since_restore": 613200, "time_since_restore": 52438.22039580345, "time_this_iter_s": 109.50699758529663, "iterations_since_restore": 511}
+{"timesteps_total": 614400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 102984.168, "num_steps_sampled": 614400, "update_time_ms": 2.622, "num_steps_trained": 614400, "load_time_ms": 0.65, "default": {"kl": 0.01368715986609459, "cur_lr": 4.999999873689376e-05, "entropy": 8.12667465209961, "total_loss": 10.103468894958496, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12915891408920288, "vf_explained_var": 0.9917065501213074, "vf_loss": 10.218769073486328}, "grad_time_ms": 721.669}, "pid": 3934253, "time_total_s": 52568.73124575615, "episode_reward_mean": -151.36754937198694, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -176.07474622271582, "policy_reward_mean": {}, "episodes_total": 12288, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -135.6766711022273, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-12-47", "training_iteration": 512, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756523567, "episode_len_mean": 50.0, "timesteps_since_restore": 614400, "time_since_restore": 52568.73124575615, "time_this_iter_s": 130.51084995269775, "iterations_since_restore": 512}
+{"timesteps_total": 615600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 102785.433, "num_steps_sampled": 615600, "update_time_ms": 2.633, "num_steps_trained": 615600, "load_time_ms": 0.653, "default": {"kl": 0.01425766758620739, "cur_lr": 4.999999873689376e-05, "entropy": 8.303292274475098, "total_loss": 15.37277889251709, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12664847075939178, "vf_explained_var": 0.9880255460739136, "vf_loss": 15.484992027282715}, "grad_time_ms": 707.702}, "pid": 3934253, "time_total_s": 52650.423523426056, "episode_reward_mean": -152.04099381919596, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -176.07474622271582, "policy_reward_mean": {}, "episodes_total": 12312, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -135.6766711022273, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-14-09", "training_iteration": 513, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756523649, "episode_len_mean": 50.0, "timesteps_since_restore": 615600, "time_since_restore": 52650.423523426056, "time_this_iter_s": 81.69227766990662, "iterations_since_restore": 513}
+{"timesteps_total": 616800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 100567.557, "num_steps_sampled": 616800, "update_time_ms": 2.548, "num_steps_trained": 616800, "load_time_ms": 0.652, "default": {"kl": 0.014286945573985577, "cur_lr": 4.999999873689376e-05, "entropy": 8.151420593261719, "total_loss": 13.630146026611328, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13689583539962769, "vf_explained_var": 0.9897435307502747, "vf_loss": 13.752577781677246}, "grad_time_ms": 716.56}, "pid": 3934253, "time_total_s": 52727.22015619278, "episode_reward_mean": -152.27760701819744, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -170.78339176081246, "policy_reward_mean": {}, "episodes_total": 12336, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -135.6766711022273, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-15-25", "training_iteration": 514, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756523725, "episode_len_mean": 50.0, "timesteps_since_restore": 616800, "time_since_restore": 52727.22015619278, "time_this_iter_s": 76.79663276672363, "iterations_since_restore": 514}
+{"timesteps_total": 618000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 99823.146, "num_steps_sampled": 618000, "update_time_ms": 2.531, "num_steps_trained": 618000, "load_time_ms": 0.676, "default": {"kl": 0.012816226109862328, "cur_lr": 4.999999873689376e-05, "entropy": 7.943474769592285, "total_loss": 11.330946922302246, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12894707918167114, "vf_explained_var": 0.991180956363678, "vf_loss": 11.446918487548828}, "grad_time_ms": 717.567}, "pid": 3934253, "time_total_s": 52824.06447529793, "episode_reward_mean": -152.1601072845309, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.464972589186, "policy_reward_mean": {}, "episodes_total": 12360, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -135.6766711022273, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-17-02", "training_iteration": 515, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756523822, "episode_len_mean": 50.0, "timesteps_since_restore": 618000, "time_since_restore": 52824.06447529793, "time_this_iter_s": 96.84431910514832, "iterations_since_restore": 515}
+{"timesteps_total": 619200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 96189.219, "num_steps_sampled": 619200, "update_time_ms": 2.517, "num_steps_trained": 619200, "load_time_ms": 0.669, "default": {"kl": 0.012791362591087818, "cur_lr": 4.999999873689376e-05, "entropy": 8.24141788482666, "total_loss": 26.494110107421875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10487866401672363, "vf_explained_var": 0.9813768863677979, "vf_loss": 26.58603858947754}, "grad_time_ms": 727.341}, "pid": 3934253, "time_total_s": 52886.338240385056, "episode_reward_mean": -152.80565974195426, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -188.88444817631853, "policy_reward_mean": {}, "episodes_total": 12384, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.63657027284142, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-18-05", "training_iteration": 516, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756523885, "episode_len_mean": 50.0, "timesteps_since_restore": 619200, "time_since_restore": 52886.338240385056, "time_this_iter_s": 62.273765087127686, "iterations_since_restore": 516}
+{"timesteps_total": 620400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 98201.82, "num_steps_sampled": 620400, "update_time_ms": 2.59, "num_steps_trained": 620400, "load_time_ms": 0.669, "default": {"kl": 0.014150070026516914, "cur_lr": 4.999999873689376e-05, "entropy": 8.084158897399902, "total_loss": 23.739412307739258, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11772307753562927, "vf_explained_var": 0.9816194176673889, "vf_loss": 23.84280776977539}, "grad_time_ms": 744.37}, "pid": 3934253, "time_total_s": 53001.20790696144, "episode_reward_mean": -152.8964755557505, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -188.88444817631853, "policy_reward_mean": {}, "episodes_total": 12408, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.63657027284142, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-19-59", "training_iteration": 517, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756523999, "episode_len_mean": 50.0, "timesteps_since_restore": 620400, "time_since_restore": 53001.20790696144, "time_this_iter_s": 114.8696665763855, "iterations_since_restore": 517}
+{"timesteps_total": 621600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 98627.304, "num_steps_sampled": 621600, "update_time_ms": 2.552, "num_steps_trained": 621600, "load_time_ms": 0.662, "default": {"kl": 0.013758014887571335, "cur_lr": 4.999999873689376e-05, "entropy": 8.227802276611328, "total_loss": 17.798114776611328, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13919270038604736, "vf_explained_var": 0.9861811399459839, "vf_loss": 17.923377990722656}, "grad_time_ms": 759.585}, "pid": 3934253, "time_total_s": 53113.86532020569, "episode_reward_mean": -152.3823699226433, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -188.88444817631853, "policy_reward_mean": {}, "episodes_total": 12432, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.97483859918682, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-21-52", "training_iteration": 518, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756524112, "episode_len_mean": 50.0, "timesteps_since_restore": 621600, "time_since_restore": 53113.86532020569, "time_this_iter_s": 112.65741324424744, "iterations_since_restore": 518}
+{"timesteps_total": 622800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 96507.54, "num_steps_sampled": 622800, "update_time_ms": 2.547, "num_steps_trained": 622800, "load_time_ms": 0.662, "default": {"kl": 0.014047209173440933, "cur_lr": 4.999999873689376e-05, "entropy": 8.386871337890625, "total_loss": 11.968228340148926, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12979204952716827, "vf_explained_var": 0.9908723831176758, "vf_loss": 12.083797454833984}, "grad_time_ms": 772.976}, "pid": 3934253, "time_total_s": 53200.06404042244, "episode_reward_mean": -152.4626276036031, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -188.88444817631853, "policy_reward_mean": {}, "episodes_total": 12456, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.3687594400822, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-23-18", "training_iteration": 519, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756524198, "episode_len_mean": 50.0, "timesteps_since_restore": 622800, "time_since_restore": 53200.06404042244, "time_this_iter_s": 86.1987202167511, "iterations_since_restore": 519}
+{"timesteps_total": 624000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 97227.837, "num_steps_sampled": 624000, "update_time_ms": 2.585, "num_steps_trained": 624000, "load_time_ms": 0.663, "default": {"kl": 0.012714684940874577, "cur_lr": 4.999999873689376e-05, "entropy": 7.912516117095947, "total_loss": 10.766222953796387, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1207198053598404, "vf_explained_var": 0.9909854531288147, "vf_loss": 10.874068260192871}, "grad_time_ms": 766.835}, "pid": 3934253, "time_total_s": 53308.747881650925, "episode_reward_mean": -151.79671619332342, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.87898398359434, "policy_reward_mean": {}, "episodes_total": 12480, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.7748335402267, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-25-07", "training_iteration": 520, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756524307, "episode_len_mean": 50.0, "timesteps_since_restore": 624000, "time_since_restore": 53308.747881650925, "time_this_iter_s": 108.68384122848511, "iterations_since_restore": 520}
+{"timesteps_total": 625200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94634.396, "num_steps_sampled": 625200, "update_time_ms": 2.608, "num_steps_trained": 625200, "load_time_ms": 0.668, "default": {"kl": 0.012906880117952824, "cur_lr": 4.999999873689376e-05, "entropy": 8.208747863769531, "total_loss": 11.932040214538574, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12314458936452866, "vf_explained_var": 0.990728497505188, "vf_loss": 12.042116165161133}, "grad_time_ms": 767.734}, "pid": 3934253, "time_total_s": 53392.32917332649, "episode_reward_mean": -151.6300232368595, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.87898398359434, "policy_reward_mean": {}, "episodes_total": 12504, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.7748335402267, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-26-31", "training_iteration": 521, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756524391, "episode_len_mean": 50.0, "timesteps_since_restore": 625200, "time_since_restore": 53392.32917332649, "time_this_iter_s": 83.58129167556763, "iterations_since_restore": 521}
+{"timesteps_total": 626400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92912.862, "num_steps_sampled": 626400, "update_time_ms": 2.461, "num_steps_trained": 626400, "load_time_ms": 0.64, "default": {"kl": 0.01441223919391632, "cur_lr": 4.999999873689376e-05, "entropy": 8.211857795715332, "total_loss": 14.67701530456543, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12073574960231781, "vf_explained_var": 0.9877294898033142, "vf_loss": 14.783159255981445}, "grad_time_ms": 766.158}, "pid": 3934253, "time_total_s": 53505.60668492317, "episode_reward_mean": -151.439543385995, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.87898398359434, "policy_reward_mean": {}, "episodes_total": 12528, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.45819028197874, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-28-24", "training_iteration": 522, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756524504, "episode_len_mean": 50.0, "timesteps_since_restore": 626400, "time_since_restore": 53505.60668492317, "time_this_iter_s": 113.27751159667969, "iterations_since_restore": 522}
+{"timesteps_total": 627600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93068.389, "num_steps_sampled": 627600, "update_time_ms": 2.534, "num_steps_trained": 627600, "load_time_ms": 0.644, "default": {"kl": 0.011968232691287994, "cur_lr": 4.999999873689376e-05, "entropy": 8.311710357666016, "total_loss": 15.748764038085938, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12205375730991364, "vf_explained_var": 0.9866352081298828, "vf_loss": 15.858699798583984}, "grad_time_ms": 786.6}, "pid": 3934253, "time_total_s": 53589.059653282166, "episode_reward_mean": -151.3334212564389, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.87898398359434, "policy_reward_mean": {}, "episodes_total": 12552, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.45819028197874, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-29-47", "training_iteration": 523, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756524587, "episode_len_mean": 50.0, "timesteps_since_restore": 627600, "time_since_restore": 53589.059653282166, "time_this_iter_s": 83.45296835899353, "iterations_since_restore": 523}
+{"timesteps_total": 628800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95102.69, "num_steps_sampled": 628800, "update_time_ms": 2.546, "num_steps_trained": 628800, "load_time_ms": 0.646, "default": {"kl": 0.011504167690873146, "cur_lr": 4.999999873689376e-05, "entropy": 8.257884979248047, "total_loss": 20.620460510253906, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11923030763864517, "vf_explained_var": 0.9844631552696228, "vf_loss": 20.72804069519043}, "grad_time_ms": 778.226}, "pid": 3934253, "time_total_s": 53686.11592555046, "episode_reward_mean": -151.36060793239966, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -162.56349252872974, "policy_reward_mean": {}, "episodes_total": 12576, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.45819028197874, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-31-25", "training_iteration": 524, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756524685, "episode_len_mean": 50.0, "timesteps_since_restore": 628800, "time_since_restore": 53686.11592555046, "time_this_iter_s": 97.05627226829529, "iterations_since_restore": 524}
+{"timesteps_total": 630000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95618.693, "num_steps_sampled": 630000, "update_time_ms": 2.56, "num_steps_trained": 630000, "load_time_ms": 0.616, "default": {"kl": 0.012765922583639622, "cur_lr": 4.999999873689376e-05, "entropy": 8.11794662475586, "total_loss": 11.24935531616211, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1303068846464157, "vf_explained_var": 0.9913408756256104, "vf_loss": 11.36673641204834}, "grad_time_ms": 785.336}, "pid": 3934253, "time_total_s": 53788.19004368782, "episode_reward_mean": -151.2742824012452, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -162.56349252872974, "policy_reward_mean": {}, "episodes_total": 12600, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.45819028197874, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-33-07", "training_iteration": 525, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756524787, "episode_len_mean": 50.0, "timesteps_since_restore": 630000, "time_since_restore": 53788.19004368782, "time_this_iter_s": 102.07411813735962, "iterations_since_restore": 525}
+{"timesteps_total": 631200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 98916.172, "num_steps_sampled": 631200, "update_time_ms": 2.524, "num_steps_trained": 631200, "load_time_ms": 0.615, "default": {"kl": 0.014033918268978596, "cur_lr": 4.999999873689376e-05, "entropy": 8.2867431640625, "total_loss": 21.977487564086914, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12659567594528198, "vf_explained_var": 0.9823175072669983, "vf_loss": 22.089872360229492}, "grad_time_ms": 773.845}, "pid": 3934253, "time_total_s": 53883.322149038315, "episode_reward_mean": -151.7377933753082, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.93235202604248, "policy_reward_mean": {}, "episodes_total": 12624, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -140.14347767908308, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-34-42", "training_iteration": 526, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756524882, "episode_len_mean": 50.0, "timesteps_since_restore": 631200, "time_since_restore": 53883.322149038315, "time_this_iter_s": 95.13210535049438, "iterations_since_restore": 526}
+{"timesteps_total": 632400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 99015.324, "num_steps_sampled": 632400, "update_time_ms": 2.479, "num_steps_trained": 632400, "load_time_ms": 0.616, "default": {"kl": 0.01316943857818842, "cur_lr": 4.999999873689376e-05, "entropy": 8.11813735961914, "total_loss": 15.774693489074707, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11189457774162292, "vf_explained_var": 0.988402783870697, "vf_loss": 15.87325382232666}, "grad_time_ms": 772.287}, "pid": 3934253, "time_total_s": 53999.16732788086, "episode_reward_mean": -151.79679673759537, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.93235202604248, "policy_reward_mean": {}, "episodes_total": 12648, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.85611414435792, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-36-38", "training_iteration": 527, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756524998, "episode_len_mean": 50.0, "timesteps_since_restore": 632400, "time_since_restore": 53999.16732788086, "time_this_iter_s": 115.84517884254456, "iterations_since_restore": 527}
+{"timesteps_total": 633600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 97399.361, "num_steps_sampled": 633600, "update_time_ms": 2.551, "num_steps_trained": 633600, "load_time_ms": 0.618, "default": {"kl": 0.013452763669192791, "cur_lr": 4.999999873689376e-05, "entropy": 7.95552921295166, "total_loss": 11.074514389038086, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12565070390701294, "vf_explained_var": 0.9907246828079224, "vf_loss": 11.186546325683594}, "grad_time_ms": 779.538}, "pid": 3934253, "time_total_s": 54095.73775577545, "episode_reward_mean": -151.85492234567778, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.93235202604248, "policy_reward_mean": {}, "episodes_total": 12672, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.85611414435792, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-38-14", "training_iteration": 528, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756525094, "episode_len_mean": 50.0, "timesteps_since_restore": 633600, "time_since_restore": 54095.73775577545, "time_this_iter_s": 96.57042789459229, "iterations_since_restore": 528}
+{"timesteps_total": 634800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 98868.009, "num_steps_sampled": 634800, "update_time_ms": 2.695, "num_steps_trained": 634800, "load_time_ms": 0.614, "default": {"kl": 0.012571911327540874, "cur_lr": 4.999999873689376e-05, "entropy": 7.765262126922607, "total_loss": 20.411996841430664, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11248551309108734, "vf_explained_var": 0.984770655632019, "vf_loss": 20.511754989624023}, "grad_time_ms": 771.55}, "pid": 3934253, "time_total_s": 54196.54490971565, "episode_reward_mean": -151.80911372745547, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -169.4209576894632, "policy_reward_mean": {}, "episodes_total": 12696, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.84469927279616, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-39-55", "training_iteration": 529, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756525195, "episode_len_mean": 50.0, "timesteps_since_restore": 634800, "time_since_restore": 54196.54490971565, "time_this_iter_s": 100.8071539402008, "iterations_since_restore": 529}
+{"timesteps_total": 636000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 97273.899, "num_steps_sampled": 636000, "update_time_ms": 2.68, "num_steps_trained": 636000, "load_time_ms": 0.612, "default": {"kl": 0.013294359669089317, "cur_lr": 4.999999873689376e-05, "entropy": 8.232682228088379, "total_loss": 13.754680633544922, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12237784266471863, "vf_explained_var": 0.9897999167442322, "vf_loss": 13.86359977722168}, "grad_time_ms": 774.521}, "pid": 3934253, "time_total_s": 54289.31747460365, "episode_reward_mean": -151.66786411049014, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -169.4209576894632, "policy_reward_mean": {}, "episodes_total": 12720, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.84469927279616, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-41-28", "training_iteration": 530, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756525288, "episode_len_mean": 50.0, "timesteps_since_restore": 636000, "time_since_restore": 54289.31747460365, "time_this_iter_s": 92.77256488800049, "iterations_since_restore": 530}
+{"timesteps_total": 637200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 96229.918, "num_steps_sampled": 637200, "update_time_ms": 2.633, "num_steps_trained": 637200, "load_time_ms": 0.613, "default": {"kl": 0.01316928118467331, "cur_lr": 4.999999873689376e-05, "entropy": 8.252899169921875, "total_loss": 14.948100090026855, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12974220514297485, "vf_explained_var": 0.9902970790863037, "vf_loss": 15.064509391784668}, "grad_time_ms": 772.885}, "pid": 3934253, "time_total_s": 54362.44271707535, "episode_reward_mean": -151.8159965155838, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -169.4209576894632, "policy_reward_mean": {}, "episodes_total": 12744, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.84469927279616, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-42-41", "training_iteration": 531, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756525361, "episode_len_mean": 50.0, "timesteps_since_restore": 637200, "time_since_restore": 54362.44271707535, "time_this_iter_s": 73.12524247169495, "iterations_since_restore": 531}
+{"timesteps_total": 638400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 96145.331, "num_steps_sampled": 638400, "update_time_ms": 2.666, "num_steps_trained": 638400, "load_time_ms": 0.62, "default": {"kl": 0.012331483885645866, "cur_lr": 4.999999873689376e-05, "entropy": 8.109546661376953, "total_loss": 11.662945747375488, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12200065702199936, "vf_explained_var": 0.9907290935516357, "vf_loss": 11.772459983825684}, "grad_time_ms": 773.306}, "pid": 3934253, "time_total_s": 54474.878903627396, "episode_reward_mean": -151.90677020646783, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -169.4209576894632, "policy_reward_mean": {}, "episodes_total": 12768, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.84469927279616, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-44-33", "training_iteration": 532, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756525473, "episode_len_mean": 50.0, "timesteps_since_restore": 638400, "time_since_restore": 54474.878903627396, "time_this_iter_s": 112.43618655204773, "iterations_since_restore": 532}
+{"timesteps_total": 639600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95973.23, "num_steps_sampled": 639600, "update_time_ms": 2.589, "num_steps_trained": 639600, "load_time_ms": 0.618, "default": {"kl": 0.011274803429841995, "cur_lr": 4.999999873689376e-05, "entropy": 7.791914939880371, "total_loss": 13.35634708404541, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10990992188453674, "vf_explained_var": 0.9885459542274475, "vf_loss": 13.454841613769531}, "grad_time_ms": 769.58}, "pid": 3934253, "time_total_s": 54556.572207927704, "episode_reward_mean": -152.0021072586479, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -163.8275029739961, "policy_reward_mean": {}, "episodes_total": 12792, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.7869101792552, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-45-55", "training_iteration": 533, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756525555, "episode_len_mean": 50.0, "timesteps_since_restore": 639600, "time_since_restore": 54556.572207927704, "time_this_iter_s": 81.69330430030823, "iterations_since_restore": 533}
+{"timesteps_total": 640800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95133.223, "num_steps_sampled": 640800, "update_time_ms": 2.603, "num_steps_trained": 640800, "load_time_ms": 0.618, "default": {"kl": 0.011796173639595509, "cur_lr": 4.999999873689376e-05, "entropy": 7.725955486297607, "total_loss": 17.79370880126953, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11688640713691711, "vf_explained_var": 0.9858031868934631, "vf_loss": 17.898653030395508}, "grad_time_ms": 775.256}, "pid": 3934253, "time_total_s": 54645.28475642204, "episode_reward_mean": -151.91061175978035, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -163.8275029739961, "policy_reward_mean": {}, "episodes_total": 12816, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.7869101792552, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-47-24", "training_iteration": 534, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756525644, "episode_len_mean": 50.0, "timesteps_since_restore": 640800, "time_since_restore": 54645.28475642204, "time_this_iter_s": 88.71254849433899, "iterations_since_restore": 534}
+{"timesteps_total": 642000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95814.997, "num_steps_sampled": 642000, "update_time_ms": 2.577, "num_steps_trained": 642000, "load_time_ms": 0.617, "default": {"kl": 0.01341434195637703, "cur_lr": 4.999999873689376e-05, "entropy": 7.816812515258789, "total_loss": 14.266355514526367, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12442073225975037, "vf_explained_var": 0.9879933595657349, "vf_loss": 14.377195358276367}, "grad_time_ms": 775.052}, "pid": 3934253, "time_total_s": 54754.17452979088, "episode_reward_mean": -151.75500545859387, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -163.8275029739961, "policy_reward_mean": {}, "episodes_total": 12840, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.79085044915752, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-49-13", "training_iteration": 535, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756525753, "episode_len_mean": 50.0, "timesteps_since_restore": 642000, "time_since_restore": 54754.17452979088, "time_this_iter_s": 108.88977336883545, "iterations_since_restore": 535}
+{"timesteps_total": 643200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 96548.363, "num_steps_sampled": 643200, "update_time_ms": 2.567, "num_steps_trained": 643200, "load_time_ms": 0.616, "default": {"kl": 0.01187937706708908, "cur_lr": 4.999999873689376e-05, "entropy": 7.564441204071045, "total_loss": 13.592453002929688, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.09899282455444336, "vf_explained_var": 0.9898480176925659, "vf_loss": 13.679417610168457}, "grad_time_ms": 776.95}, "pid": 3934253, "time_total_s": 54856.658707141876, "episode_reward_mean": -151.69387446085312, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -163.9252472156271, "policy_reward_mean": {}, "episodes_total": 12864, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -149.25603792487527, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-50-55", "training_iteration": 536, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756525855, "episode_len_mean": 50.0, "timesteps_since_restore": 643200, "time_since_restore": 54856.658707141876, "time_this_iter_s": 102.48417735099792, "iterations_since_restore": 536}
+{"timesteps_total": 644400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93374.482, "num_steps_sampled": 644400, "update_time_ms": 2.65, "num_steps_trained": 644400, "load_time_ms": 0.617, "default": {"kl": 0.013803391717374325, "cur_lr": 4.999999873689376e-05, "entropy": 7.7664923667907715, "total_loss": 10.183890342712402, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12922273576259613, "vf_explained_var": 0.9913797378540039, "vf_loss": 10.299137115478516}, "grad_time_ms": 774.203}, "pid": 3934253, "time_total_s": 54940.73849415779, "episode_reward_mean": -151.52124492364973, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -163.9252472156271, "policy_reward_mean": {}, "episodes_total": 12888, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -148.7753803736122, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-52-19", "training_iteration": 537, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756525939, "episode_len_mean": 50.0, "timesteps_since_restore": 644400, "time_since_restore": 54940.73849415779, "time_this_iter_s": 84.07978701591492, "iterations_since_restore": 537}
+{"timesteps_total": 645600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93288.352, "num_steps_sampled": 645600, "update_time_ms": 2.676, "num_steps_trained": 645600, "load_time_ms": 0.63, "default": {"kl": 0.01337174791842699, "cur_lr": 4.999999873689376e-05, "entropy": 7.903218746185303, "total_loss": 7.999932289123535, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13875660300254822, "vf_explained_var": 0.993107259273529, "vf_loss": 8.125149726867676}, "grad_time_ms": 775.502}, "pid": 3934253, "time_total_s": 55036.46237754822, "episode_reward_mean": -151.58003477042269, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -163.9252472156271, "policy_reward_mean": {}, "episodes_total": 12912, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -147.62447533124597, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-53-55", "training_iteration": 538, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756526035, "episode_len_mean": 50.0, "timesteps_since_restore": 645600, "time_since_restore": 55036.46237754822, "time_this_iter_s": 95.72388339042664, "iterations_since_restore": 538}
+{"timesteps_total": 646800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94409.314, "num_steps_sampled": 646800, "update_time_ms": 2.543, "num_steps_trained": 646800, "load_time_ms": 0.631, "default": {"kl": 0.011509610339999199, "cur_lr": 4.999999873689376e-05, "entropy": 7.807576656341553, "total_loss": 12.251175880432129, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1061524972319603, "vf_explained_var": 0.9901471734046936, "vf_loss": 12.345675468444824}, "grad_time_ms": 768.137}, "pid": 3934253, "time_total_s": 55148.40368771553, "episode_reward_mean": -151.5295869223695, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -163.9252472156271, "policy_reward_mean": {}, "episodes_total": 12936, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.74760840253305, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-55-47", "training_iteration": 539, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756526147, "episode_len_mean": 50.0, "timesteps_since_restore": 646800, "time_since_restore": 55148.40368771553, "time_this_iter_s": 111.94131016731262, "iterations_since_restore": 539}
+{"timesteps_total": 648000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94642.933, "num_steps_sampled": 648000, "update_time_ms": 2.522, "num_steps_trained": 648000, "load_time_ms": 0.636, "default": {"kl": 0.01324335765093565, "cur_lr": 4.999999873689376e-05, "entropy": 7.777318954467773, "total_loss": 14.313945770263672, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10932556539773941, "vf_explained_var": 0.9886135458946228, "vf_loss": 14.40986442565918}, "grad_time_ms": 775.809}, "pid": 3934253, "time_total_s": 55243.58929491043, "episode_reward_mean": -151.74086113916158, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.98272412453377, "policy_reward_mean": {}, "episodes_total": 12960, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.74760840253305, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-57-22", "training_iteration": 540, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756526242, "episode_len_mean": 50.0, "timesteps_since_restore": 648000, "time_since_restore": 55243.58929491043, "time_this_iter_s": 95.18560719490051, "iterations_since_restore": 540}
+{"timesteps_total": 649200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95782.371, "num_steps_sampled": 649200, "update_time_ms": 2.528, "num_steps_trained": 649200, "load_time_ms": 0.639, "default": {"kl": 0.011101160198450089, "cur_lr": 4.999999873689376e-05, "entropy": 7.8202033042907715, "total_loss": 24.006174087524414, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1093890443444252, "vf_explained_var": 0.9816538691520691, "vf_loss": 24.104326248168945}, "grad_time_ms": 785.284}, "pid": 3934253, "time_total_s": 55328.203587055206, "episode_reward_mean": -151.7548923149819, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.98272412453377, "policy_reward_mean": {}, "episodes_total": 12984, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.9202484174889, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_05-58-47", "training_iteration": 541, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756526327, "episode_len_mean": 50.0, "timesteps_since_restore": 649200, "time_since_restore": 55328.203587055206, "time_this_iter_s": 84.61429214477539, "iterations_since_restore": 541}
+{"timesteps_total": 650400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93046.918, "num_steps_sampled": 650400, "update_time_ms": 2.545, "num_steps_trained": 650400, "load_time_ms": 0.628, "default": {"kl": 0.013351892121136189, "cur_lr": 4.999999873689376e-05, "entropy": 7.874947547912598, "total_loss": 13.548004150390625, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12411337345838547, "vf_explained_var": 0.9898288249969482, "vf_loss": 13.658597946166992}, "grad_time_ms": 789.284}, "pid": 3934253, "time_total_s": 55413.32578778267, "episode_reward_mean": -151.8367207524007, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.98272412453377, "policy_reward_mean": {}, "episodes_total": 13008, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.9202484174889, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-00-12", "training_iteration": 542, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756526412, "episode_len_mean": 50.0, "timesteps_since_restore": 650400, "time_since_restore": 55413.32578778267, "time_this_iter_s": 85.12220072746277, "iterations_since_restore": 542}
+{"timesteps_total": 651600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 97231.22, "num_steps_sampled": 651600, "update_time_ms": 2.556, "num_steps_trained": 651600, "load_time_ms": 0.622, "default": {"kl": 0.014025096781551838, "cur_lr": 4.999999873689376e-05, "entropy": 7.527444839477539, "total_loss": 16.716171264648438, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12651662528514862, "vf_explained_var": 0.9879534840583801, "vf_loss": 16.828487396240234}, "grad_time_ms": 774.998}, "pid": 3934253, "time_total_s": 55536.71937775612, "episode_reward_mean": -152.20230023532304, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.6957739032894, "policy_reward_mean": {}, "episodes_total": 13032, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.9202484174889, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-02-16", "training_iteration": 543, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756526536, "episode_len_mean": 50.0, "timesteps_since_restore": 651600, "time_since_restore": 55536.71937775612, "time_this_iter_s": 123.3935899734497, "iterations_since_restore": 543}
+{"timesteps_total": 652800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 96703.712, "num_steps_sampled": 652800, "update_time_ms": 2.543, "num_steps_trained": 652800, "load_time_ms": 0.633, "default": {"kl": 0.010980258695781231, "cur_lr": 4.999999873689376e-05, "entropy": 7.6602020263671875, "total_loss": 22.34575653076172, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10523133724927902, "vf_explained_var": 0.9839035868644714, "vf_loss": 22.439870834350586}, "grad_time_ms": 767.295}, "pid": 3934253, "time_total_s": 55620.079362392426, "episode_reward_mean": -152.33134642985738, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.6957739032894, "policy_reward_mean": {}, "episodes_total": 13056, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.9202484174889, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-03-39", "training_iteration": 544, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756526619, "episode_len_mean": 50.0, "timesteps_since_restore": 652800, "time_since_restore": 55620.079362392426, "time_this_iter_s": 83.35998463630676, "iterations_since_restore": 544}
+{"timesteps_total": 654000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 96466.977, "num_steps_sampled": 654000, "update_time_ms": 2.52, "num_steps_trained": 654000, "load_time_ms": 0.641, "default": {"kl": 0.011783335357904434, "cur_lr": 4.999999873689376e-05, "entropy": 7.79296875, "total_loss": 40.88715362548828, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10244444757699966, "vf_explained_var": 0.9700209498405457, "vf_loss": 40.9776611328125}, "grad_time_ms": 765.164}, "pid": 3934253, "time_total_s": 55726.58039832115, "episode_reward_mean": -152.20774698978352, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -172.32353041127666, "policy_reward_mean": {}, "episodes_total": 13080, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -149.34913540216635, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-05-25", "training_iteration": 545, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756526725, "episode_len_mean": 50.0, "timesteps_since_restore": 654000, "time_since_restore": 55726.58039832115, "time_this_iter_s": 106.5010359287262, "iterations_since_restore": 545}
+{"timesteps_total": 655200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94113.276, "num_steps_sampled": 655200, "update_time_ms": 2.559, "num_steps_trained": 655200, "load_time_ms": 0.652, "default": {"kl": 0.012306980788707733, "cur_lr": 4.999999873689376e-05, "entropy": 7.727290630340576, "total_loss": 10.178478240966797, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12441620230674744, "vf_explained_var": 0.9918663501739502, "vf_loss": 10.290432929992676}, "grad_time_ms": 773.947}, "pid": 3934253, "time_total_s": 55805.616351127625, "episode_reward_mean": -152.20533783139092, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -172.32353041127666, "policy_reward_mean": {}, "episodes_total": 13104, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.8546421528748, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-06-44", "training_iteration": 546, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756526804, "episode_len_mean": 50.0, "timesteps_since_restore": 655200, "time_since_restore": 55805.616351127625, "time_this_iter_s": 79.03595280647278, "iterations_since_restore": 546}
+{"timesteps_total": 656400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93427.926, "num_steps_sampled": 656400, "update_time_ms": 2.466, "num_steps_trained": 656400, "load_time_ms": 0.643, "default": {"kl": 0.011928428895771503, "cur_lr": 4.999999873689376e-05, "entropy": 7.634652137756348, "total_loss": 16.324533462524414, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10499259829521179, "vf_explained_var": 0.9873420000076294, "vf_loss": 16.417448043823242}, "grad_time_ms": 773.586}, "pid": 3934253, "time_total_s": 55882.83739686012, "episode_reward_mean": -151.81330852174514, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -172.32353041127666, "policy_reward_mean": {}, "episodes_total": 13128, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.8546421528748, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-08-02", "training_iteration": 547, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756526882, "episode_len_mean": 50.0, "timesteps_since_restore": 656400, "time_since_restore": 55882.83739686012, "time_this_iter_s": 77.22104573249817, "iterations_since_restore": 547}
+{"timesteps_total": 657600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94058.335, "num_steps_sampled": 657600, "update_time_ms": 2.362, "num_steps_trained": 657600, "load_time_ms": 0.634, "default": {"kl": 0.011866304092109203, "cur_lr": 4.999999873689376e-05, "entropy": 7.796850204467773, "total_loss": 22.88044548034668, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11830038577318192, "vf_explained_var": 0.9822542667388916, "vf_loss": 22.986730575561523}, "grad_time_ms": 766.724}, "pid": 3934253, "time_total_s": 55984.79539489746, "episode_reward_mean": -151.95285161137662, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -172.32353041127666, "policy_reward_mean": {}, "episodes_total": 13152, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.8546421528748, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-09-44", "training_iteration": 548, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756526984, "episode_len_mean": 50.0, "timesteps_since_restore": 657600, "time_since_restore": 55984.79539489746, "time_this_iter_s": 101.95799803733826, "iterations_since_restore": 548}
+{"timesteps_total": 658800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94776.906, "num_steps_sampled": 658800, "update_time_ms": 2.377, "num_steps_trained": 658800, "load_time_ms": 0.635, "default": {"kl": 0.013383209705352783, "cur_lr": 4.999999873689376e-05, "entropy": 8.047952651977539, "total_loss": 16.600008010864258, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1312945932149887, "vf_explained_var": 0.987158477306366, "vf_loss": 16.71775245666504}, "grad_time_ms": 775.892}, "pid": 3934253, "time_total_s": 56104.01416516304, "episode_reward_mean": -151.82635310445673, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.28430668358237, "policy_reward_mean": {}, "episodes_total": 13176, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.8730050272048, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-11-43", "training_iteration": 549, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756527103, "episode_len_mean": 50.0, "timesteps_since_restore": 658800, "time_since_restore": 56104.01416516304, "time_this_iter_s": 119.21877026557922, "iterations_since_restore": 549}
+{"timesteps_total": 660000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95396.522, "num_steps_sampled": 660000, "update_time_ms": 2.399, "num_steps_trained": 660000, "load_time_ms": 0.631, "default": {"kl": 0.012394605204463005, "cur_lr": 4.999999873689376e-05, "entropy": 7.464038372039795, "total_loss": 13.411670684814453, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11062879860401154, "vf_explained_var": 0.9897158741950989, "vf_loss": 13.509750366210938}, "grad_time_ms": 766.048}, "pid": 3934253, "time_total_s": 56205.296759843826, "episode_reward_mean": -151.7888369248003, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.28430668358237, "policy_reward_mean": {}, "episodes_total": 13200, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.8730050272048, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-13-24", "training_iteration": 550, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756527204, "episode_len_mean": 50.0, "timesteps_since_restore": 660000, "time_since_restore": 56205.296759843826, "time_this_iter_s": 101.28259468078613, "iterations_since_restore": 550}
+{"timesteps_total": 661200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95110.739, "num_steps_sampled": 661200, "update_time_ms": 2.409, "num_steps_trained": 661200, "load_time_ms": 0.633, "default": {"kl": 0.013078085146844387, "cur_lr": 4.999999873689376e-05, "entropy": 7.584109783172607, "total_loss": 15.645307540893555, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11028580367565155, "vf_explained_var": 0.9869313836097717, "vf_loss": 15.742351531982422}, "grad_time_ms": 755.638}, "pid": 3934253, "time_total_s": 56286.949072122574, "episode_reward_mean": -151.88660863617798, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.28430668358237, "policy_reward_mean": {}, "episodes_total": 13224, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.8730050272048, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-14-46", "training_iteration": 551, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756527286, "episode_len_mean": 50.0, "timesteps_since_restore": 661200, "time_since_restore": 56286.949072122574, "time_this_iter_s": 81.65231227874756, "iterations_since_restore": 551}
+{"timesteps_total": 662400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 97874.593, "num_steps_sampled": 662400, "update_time_ms": 2.444, "num_steps_trained": 662400, "load_time_ms": 0.639, "default": {"kl": 0.013991860672831535, "cur_lr": 4.999999873689376e-05, "entropy": 7.786781311035156, "total_loss": 15.687580108642578, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12217790633440018, "vf_explained_var": 0.9863888621330261, "vf_loss": 15.795589447021484}, "grad_time_ms": 749.511}, "pid": 3934253, "time_total_s": 56399.64902329445, "episode_reward_mean": -151.60787893259965, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.2084314069166, "policy_reward_mean": {}, "episodes_total": 13248, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.8730050272048, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-16-39", "training_iteration": 552, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756527399, "episode_len_mean": 50.0, "timesteps_since_restore": 662400, "time_since_restore": 56399.64902329445, "time_this_iter_s": 112.699951171875, "iterations_since_restore": 552}
+{"timesteps_total": 663600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94479.078, "num_steps_sampled": 663600, "update_time_ms": 2.41, "num_steps_trained": 663600, "load_time_ms": 0.65, "default": {"kl": 0.011322933249175549, "cur_lr": 4.999999873689376e-05, "entropy": 7.6803412437438965, "total_loss": 15.122419357299805, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11909367144107819, "vf_explained_var": 0.9879716038703918, "vf_loss": 15.230048179626465}, "grad_time_ms": 762.713}, "pid": 3934253, "time_total_s": 56489.219517707825, "episode_reward_mean": -151.44552527452606, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.2084314069166, "policy_reward_mean": {}, "episodes_total": 13272, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.8730050272048, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-18-08", "training_iteration": 553, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756527488, "episode_len_mean": 50.0, "timesteps_since_restore": 663600, "time_since_restore": 56489.219517707825, "time_this_iter_s": 89.57049441337585, "iterations_since_restore": 553}
+{"timesteps_total": 664800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 96160.343, "num_steps_sampled": 664800, "update_time_ms": 2.455, "num_steps_trained": 664800, "load_time_ms": 0.631, "default": {"kl": 0.0125638572499156, "cur_lr": 4.999999873689376e-05, "entropy": 7.583347797393799, "total_loss": 11.01961612701416, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12256471067667007, "vf_explained_var": 0.9910979866981506, "vf_loss": 11.129459381103516}, "grad_time_ms": 771.023}, "pid": 3934253, "time_total_s": 56589.475972890854, "episode_reward_mean": -151.67157253702425, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.2084314069166, "policy_reward_mean": {}, "episodes_total": 13296, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.75703188287594, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-19-48", "training_iteration": 554, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756527588, "episode_len_mean": 50.0, "timesteps_since_restore": 664800, "time_since_restore": 56589.475972890854, "time_this_iter_s": 100.25645518302917, "iterations_since_restore": 554}
+{"timesteps_total": 666000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 96789.175, "num_steps_sampled": 666000, "update_time_ms": 2.525, "num_steps_trained": 666000, "load_time_ms": 0.631, "default": {"kl": 0.01608692668378353, "cur_lr": 4.999999873689376e-05, "entropy": 8.026680946350098, "total_loss": 14.854989051818848, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13966526091098785, "vf_explained_var": 0.9897687435150146, "vf_loss": 14.978367805480957}, "grad_time_ms": 774.245}, "pid": 3934253, "time_total_s": 56702.29811143875, "episode_reward_mean": -151.56715217271983, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -168.90639455884744, "policy_reward_mean": {}, "episodes_total": 13320, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.36764966395847, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-21-41", "training_iteration": 555, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756527701, "episode_len_mean": 50.0, "timesteps_since_restore": 666000, "time_since_restore": 56702.29811143875, "time_this_iter_s": 112.82213854789734, "iterations_since_restore": 555}
+{"timesteps_total": 667200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 98435.435, "num_steps_sampled": 667200, "update_time_ms": 2.527, "num_steps_trained": 667200, "load_time_ms": 0.62, "default": {"kl": 0.013392424210906029, "cur_lr": 4.999999873689376e-05, "entropy": 7.8170342445373535, "total_loss": 10.752395629882812, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11502734571695328, "vf_explained_var": 0.9913275837898254, "vf_loss": 10.853862762451172}, "grad_time_ms": 770.789}, "pid": 3934253, "time_total_s": 56797.761281490326, "episode_reward_mean": -151.40665633516022, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -168.90639455884744, "policy_reward_mean": {}, "episodes_total": 13344, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.96352003292222, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-23-17", "training_iteration": 556, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756527797, "episode_len_mean": 50.0, "timesteps_since_restore": 667200, "time_since_restore": 56797.761281490326, "time_this_iter_s": 95.4631700515747, "iterations_since_restore": 556}
+{"timesteps_total": 668400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 101548.632, "num_steps_sampled": 668400, "update_time_ms": 2.522, "num_steps_trained": 668400, "load_time_ms": 0.63, "default": {"kl": 0.013542591594159603, "cur_lr": 4.999999873689376e-05, "entropy": 7.707234859466553, "total_loss": 9.735451698303223, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1209249496459961, "vf_explained_var": 0.9922139644622803, "vf_loss": 9.84266471862793}, "grad_time_ms": 777.582}, "pid": 3934253, "time_total_s": 56906.1828122139, "episode_reward_mean": -151.23620579021028, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -168.90639455884744, "policy_reward_mean": {}, "episodes_total": 13368, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.96352003292222, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-25-05", "training_iteration": 557, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756527905, "episode_len_mean": 50.0, "timesteps_since_restore": 668400, "time_since_restore": 56906.1828122139, "time_this_iter_s": 108.42153072357178, "iterations_since_restore": 557}
+{"timesteps_total": 669600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 102200.348, "num_steps_sampled": 669600, "update_time_ms": 2.554, "num_steps_trained": 669600, "load_time_ms": 0.628, "default": {"kl": 0.01427131425589323, "cur_lr": 4.999999873689376e-05, "entropy": 7.790090560913086, "total_loss": 8.442832946777344, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1334570199251175, "vf_explained_var": 0.9929354190826416, "vf_loss": 8.561840057373047}, "grad_time_ms": 762.454}, "pid": 3934253, "time_total_s": 57014.50557184219, "episode_reward_mean": -151.2608926220695, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -168.90639455884744, "policy_reward_mean": {}, "episodes_total": 13392, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.96352003292222, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-26-54", "training_iteration": 558, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756528014, "episode_len_mean": 50.0, "timesteps_since_restore": 669600, "time_since_restore": 57014.50557184219, "time_this_iter_s": 108.3227596282959, "iterations_since_restore": 558}
+{"timesteps_total": 670800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 101061.44, "num_steps_sampled": 670800, "update_time_ms": 2.523, "num_steps_trained": 670800, "load_time_ms": 0.622, "default": {"kl": 0.012106995098292828, "cur_lr": 4.999999873689376e-05, "entropy": 7.880985736846924, "total_loss": 13.949009895324707, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11816269904375076, "vf_explained_var": 0.9890486598014832, "vf_loss": 14.054914474487305}, "grad_time_ms": 753.675}, "pid": 3934253, "time_total_s": 57122.24686527252, "episode_reward_mean": -151.31124070736968, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.1500952171991, "policy_reward_mean": {}, "episodes_total": 13416, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.96352003292222, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-28-41", "training_iteration": 559, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756528121, "episode_len_mean": 50.0, "timesteps_since_restore": 670800, "time_since_restore": 57122.24686527252, "time_this_iter_s": 107.74129343032837, "iterations_since_restore": 559}
+{"timesteps_total": 672000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 99077.936, "num_steps_sampled": 672000, "update_time_ms": 2.475, "num_steps_trained": 672000, "load_time_ms": 0.625, "default": {"kl": 0.012361031025648117, "cur_lr": 4.999999873689376e-05, "entropy": 7.896833419799805, "total_loss": 17.845319747924805, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1219933032989502, "vf_explained_var": 0.9871928691864014, "vf_loss": 17.954797744750977}, "grad_time_ms": 751.704}, "pid": 3934253, "time_total_s": 57203.67510128021, "episode_reward_mean": -151.56446937536896, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.47141499845398, "policy_reward_mean": {}, "episodes_total": 13440, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -143.1663559505958, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-30-03", "training_iteration": 560, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756528203, "episode_len_mean": 50.0, "timesteps_since_restore": 672000, "time_since_restore": 57203.67510128021, "time_this_iter_s": 81.42823600769043, "iterations_since_restore": 560}
+{"timesteps_total": 673200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 101133.18, "num_steps_sampled": 673200, "update_time_ms": 2.442, "num_steps_trained": 673200, "load_time_ms": 0.618, "default": {"kl": 0.012184562161564827, "cur_lr": 4.999999873689376e-05, "entropy": 7.8777337074279785, "total_loss": 17.04519271850586, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12136489897966385, "vf_explained_var": 0.9857383370399475, "vf_loss": 17.154220581054688}, "grad_time_ms": 763.265}, "pid": 3934253, "time_total_s": 57305.99560403824, "episode_reward_mean": -151.73232247828938, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.47141499845398, "policy_reward_mean": {}, "episodes_total": 13464, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.79079619262694, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-31-45", "training_iteration": 561, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756528305, "episode_len_mean": 50.0, "timesteps_since_restore": 673200, "time_since_restore": 57305.99560403824, "time_this_iter_s": 102.32050275802612, "iterations_since_restore": 561}
+{"timesteps_total": 674400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 100718.488, "num_steps_sampled": 674400, "update_time_ms": 2.403, "num_steps_trained": 674400, "load_time_ms": 0.617, "default": {"kl": 0.012234192341566086, "cur_lr": 4.999999873689376e-05, "entropy": 7.9072394371032715, "total_loss": 12.1405668258667, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12168225646018982, "vf_explained_var": 0.9896350502967834, "vf_loss": 12.249862670898438}, "grad_time_ms": 763.304}, "pid": 3934253, "time_total_s": 57414.548646211624, "episode_reward_mean": -151.86656038831188, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.47141499845398, "policy_reward_mean": {}, "episodes_total": 13488, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.79079619262694, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-33-34", "training_iteration": 562, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756528414, "episode_len_mean": 50.0, "timesteps_since_restore": 674400, "time_since_restore": 57414.548646211624, "time_this_iter_s": 108.55304217338562, "iterations_since_restore": 562}
+{"timesteps_total": 675600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 100828.693, "num_steps_sampled": 675600, "update_time_ms": 2.446, "num_steps_trained": 675600, "load_time_ms": 0.612, "default": {"kl": 0.011146489530801773, "cur_lr": 4.999999873689376e-05, "entropy": 7.461226463317871, "total_loss": 10.990604400634766, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12205490469932556, "vf_explained_var": 0.9911925792694092, "vf_loss": 11.101373672485352}, "grad_time_ms": 756.709}, "pid": 3934253, "time_total_s": 57505.155586481094, "episode_reward_mean": -151.8312616787746, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.17639154659727, "policy_reward_mean": {}, "episodes_total": 13512, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -135.40392465635645, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-35-04", "training_iteration": 563, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756528504, "episode_len_mean": 50.0, "timesteps_since_restore": 675600, "time_since_restore": 57505.155586481094, "time_this_iter_s": 90.60694026947021, "iterations_since_restore": 563}
+{"timesteps_total": 676800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 99875.662, "num_steps_sampled": 676800, "update_time_ms": 2.389, "num_steps_trained": 676800, "load_time_ms": 0.615, "default": {"kl": 0.013896778225898743, "cur_lr": 4.999999873689376e-05, "entropy": 7.457207679748535, "total_loss": 11.320537567138672, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13056457042694092, "vf_explained_var": 0.9907848834991455, "vf_loss": 11.437031745910645}, "grad_time_ms": 762.526}, "pid": 3934253, "time_total_s": 57595.93927574158, "episode_reward_mean": -152.12418592379265, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -168.22577448549237, "policy_reward_mean": {}, "episodes_total": 13536, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -135.40392465635645, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-36-35", "training_iteration": 564, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756528595, "episode_len_mean": 50.0, "timesteps_since_restore": 676800, "time_since_restore": 57595.93927574158, "time_this_iter_s": 90.78368926048279, "iterations_since_restore": 564}
+{"timesteps_total": 678000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 100426.129, "num_steps_sampled": 678000, "update_time_ms": 2.36, "num_steps_trained": 678000, "load_time_ms": 0.613, "default": {"kl": 0.011276878416538239, "cur_lr": 4.999999873689376e-05, "entropy": 7.278744220733643, "total_loss": 16.736454010009766, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11820343136787415, "vf_explained_var": 0.9870368242263794, "vf_loss": 16.843238830566406}, "grad_time_ms": 748.383}, "pid": 3934253, "time_total_s": 57714.12493252754, "episode_reward_mean": -152.0202009410142, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -168.22577448549237, "policy_reward_mean": {}, "episodes_total": 13560, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -135.40392465635645, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-38-33", "training_iteration": 565, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756528713, "episode_len_mean": 50.0, "timesteps_since_restore": 678000, "time_since_restore": 57714.12493252754, "time_this_iter_s": 118.18565678596497, "iterations_since_restore": 565}
+{"timesteps_total": 679200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 101492.584, "num_steps_sampled": 679200, "update_time_ms": 2.34, "num_steps_trained": 679200, "load_time_ms": 0.615, "default": {"kl": 0.014653812162578106, "cur_lr": 4.999999873689376e-05, "entropy": 7.344961166381836, "total_loss": 20.03702735900879, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11613241583108902, "vf_explained_var": 0.9865771532058716, "vf_loss": 20.138322830200195}, "grad_time_ms": 735.91}, "pid": 3934253, "time_total_s": 57820.13002371788, "episode_reward_mean": -151.91582745968978, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -168.22577448549237, "policy_reward_mean": {}, "episodes_total": 13584, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -135.40392465635645, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-40-19", "training_iteration": 566, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756528819, "episode_len_mean": 50.0, "timesteps_since_restore": 679200, "time_since_restore": 57820.13002371788, "time_this_iter_s": 106.00509119033813, "iterations_since_restore": 566}
+{"timesteps_total": 680400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 98846.678, "num_steps_sampled": 680400, "update_time_ms": 2.359, "num_steps_trained": 680400, "load_time_ms": 0.615, "default": {"kl": 0.011863755993545055, "cur_lr": 4.999999873689376e-05, "entropy": 7.693569660186768, "total_loss": 20.87421226501465, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11812932044267654, "vf_explained_var": 0.9834575653076172, "vf_loss": 20.98032569885254}, "grad_time_ms": 732.276}, "pid": 3934253, "time_total_s": 57902.05630970001, "episode_reward_mean": -151.84447129846183, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -168.22577448549237, "policy_reward_mean": {}, "episodes_total": 13608, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.66634416044175, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-41-41", "training_iteration": 567, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756528901, "episode_len_mean": 50.0, "timesteps_since_restore": 680400, "time_since_restore": 57902.05630970001, "time_this_iter_s": 81.92628598213196, "iterations_since_restore": 567}
+{"timesteps_total": 681600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 97790.357, "num_steps_sampled": 681600, "update_time_ms": 2.397, "num_steps_trained": 681600, "load_time_ms": 0.616, "default": {"kl": 0.011216883547604084, "cur_lr": 4.999999873689376e-05, "entropy": 7.75992488861084, "total_loss": 16.491910934448242, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12139460444450378, "vf_explained_var": 0.98785400390625, "vf_loss": 16.601947784423828}, "grad_time_ms": 753.3}, "pid": 3934253, "time_total_s": 58000.02692985535, "episode_reward_mean": -151.5986989681715, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.57318712299187, "policy_reward_mean": {}, "episodes_total": 13632, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.99448377052, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-43-19", "training_iteration": 568, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756528999, "episode_len_mean": 50.0, "timesteps_since_restore": 681600, "time_since_restore": 58000.02692985535, "time_this_iter_s": 97.97062015533447, "iterations_since_restore": 568}
+{"timesteps_total": 682800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 98730.33, "num_steps_sampled": 682800, "update_time_ms": 2.389, "num_steps_trained": 682800, "load_time_ms": 0.62, "default": {"kl": 0.011672453954815865, "cur_lr": 4.999999873689376e-05, "entropy": 7.440184593200684, "total_loss": 13.130718231201172, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10527868568897247, "vf_explained_var": 0.9891159534454346, "vf_loss": 13.224178314208984}, "grad_time_ms": 760.107}, "pid": 3934253, "time_total_s": 58117.23666000366, "episode_reward_mean": -151.80092093432214, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.57318712299187, "policy_reward_mean": {}, "episodes_total": 13656, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.99448377052, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-45-16", "training_iteration": 569, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756529116, "episode_len_mean": 50.0, "timesteps_since_restore": 682800, "time_since_restore": 58117.23666000366, "time_this_iter_s": 117.20973014831543, "iterations_since_restore": 569}
+{"timesteps_total": 684000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 97885.303, "num_steps_sampled": 684000, "update_time_ms": 2.389, "num_steps_trained": 684000, "load_time_ms": 0.614, "default": {"kl": 0.014902864582836628, "cur_lr": 4.999999873689376e-05, "entropy": 7.657007694244385, "total_loss": 12.331796646118164, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1290094405412674, "vf_explained_var": 0.989945650100708, "vf_loss": 12.445716857910156}, "grad_time_ms": 756.273}, "pid": 3934253, "time_total_s": 58190.176151037216, "episode_reward_mean": -152.00770656228394, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.57318712299187, "policy_reward_mean": {}, "episodes_total": 13680, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.99448377052, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-46-29", "training_iteration": 570, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756529189, "episode_len_mean": 50.0, "timesteps_since_restore": 684000, "time_since_restore": 58190.176151037216, "time_this_iter_s": 72.93949103355408, "iterations_since_restore": 570}
+{"timesteps_total": 685200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94176.655, "num_steps_sampled": 685200, "update_time_ms": 2.514, "num_steps_trained": 685200, "load_time_ms": 0.617, "default": {"kl": 0.013572430238127708, "cur_lr": 4.999999873689376e-05, "entropy": 7.551823139190674, "total_loss": 8.981759071350098, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13007181882858276, "vf_explained_var": 0.9922204613685608, "vf_loss": 9.098089218139648}, "grad_time_ms": 753.349}, "pid": 3934253, "time_total_s": 58255.38171863556, "episode_reward_mean": -151.82728255358478, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.57318712299187, "policy_reward_mean": {}, "episodes_total": 13704, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.99448377052, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-47-35", "training_iteration": 571, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756529255, "episode_len_mean": 50.0, "timesteps_since_restore": 685200, "time_since_restore": 58255.38171863556, "time_this_iter_s": 65.2055675983429, "iterations_since_restore": 571}
+{"timesteps_total": 686400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92049.621, "num_steps_sampled": 686400, "update_time_ms": 2.474, "num_steps_trained": 686400, "load_time_ms": 0.621, "default": {"kl": 0.013356123119592667, "cur_lr": 4.999999873689376e-05, "entropy": 7.2052764892578125, "total_loss": 12.253599166870117, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10353487730026245, "vf_explained_var": 0.9895342588424683, "vf_loss": 12.343612670898438}, "grad_time_ms": 753.076}, "pid": 3934253, "time_total_s": 58342.66126012802, "episode_reward_mean": -151.84706925774134, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.6289682061747, "policy_reward_mean": {}, "episodes_total": 13728, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -146.64909800243484, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-49-02", "training_iteration": 572, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756529342, "episode_len_mean": 50.0, "timesteps_since_restore": 686400, "time_since_restore": 58342.66126012802, "time_this_iter_s": 87.27954149246216, "iterations_since_restore": 572}
+{"timesteps_total": 687600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92856.237, "num_steps_sampled": 687600, "update_time_ms": 2.458, "num_steps_trained": 687600, "load_time_ms": 0.614, "default": {"kl": 0.012467894703149796, "cur_lr": 4.999999873689376e-05, "entropy": 7.665492534637451, "total_loss": 12.708492279052734, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11531029641628265, "vf_explained_var": 0.9901459217071533, "vf_loss": 12.811178207397461}, "grad_time_ms": 761.337}, "pid": 3934253, "time_total_s": 58441.416241407394, "episode_reward_mean": -151.3989287948314, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -160.94876140781466, "policy_reward_mean": {}, "episodes_total": 13752, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.5995533319289, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-50-41", "training_iteration": 573, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756529441, "episode_len_mean": 50.0, "timesteps_since_restore": 687600, "time_since_restore": 58441.416241407394, "time_this_iter_s": 98.75498127937317, "iterations_since_restore": 573}
+{"timesteps_total": 688800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92962.609, "num_steps_sampled": 688800, "update_time_ms": 2.469, "num_steps_trained": 688800, "load_time_ms": 0.611, "default": {"kl": 0.012100116349756718, "cur_lr": 4.999999873689376e-05, "entropy": 7.34058141708374, "total_loss": 18.82788848876953, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11172984540462494, "vf_explained_var": 0.9857650399208069, "vf_loss": 18.927371978759766}, "grad_time_ms": 728.025}, "pid": 3934253, "time_total_s": 58532.93010187149, "episode_reward_mean": -151.50244165879874, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -161.4437523974731, "policy_reward_mean": {}, "episodes_total": 13776, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.5995533319289, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-52-12", "training_iteration": 574, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756529532, "episode_len_mean": 50.0, "timesteps_since_restore": 688800, "time_since_restore": 58532.93010187149, "time_this_iter_s": 91.51386046409607, "iterations_since_restore": 574}
+{"timesteps_total": 690000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 90303.556, "num_steps_sampled": 690000, "update_time_ms": 2.483, "num_steps_trained": 690000, "load_time_ms": 0.61, "default": {"kl": 0.012349085882306099, "cur_lr": 4.999999873689376e-05, "entropy": 7.143519878387451, "total_loss": 17.44886016845703, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11287827044725418, "vf_explained_var": 0.9857209324836731, "vf_loss": 17.549238204956055}, "grad_time_ms": 718.251}, "pid": 3934253, "time_total_s": 58624.427540779114, "episode_reward_mean": -151.5061001221446, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -161.4437523974731, "policy_reward_mean": {}, "episodes_total": 13800, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.5995533319289, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-53-44", "training_iteration": 575, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756529624, "episode_len_mean": 50.0, "timesteps_since_restore": 690000, "time_since_restore": 58624.427540779114, "time_this_iter_s": 91.49743890762329, "iterations_since_restore": 575}
+{"timesteps_total": 691200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 91299.366, "num_steps_sampled": 691200, "update_time_ms": 2.473, "num_steps_trained": 691200, "load_time_ms": 0.617, "default": {"kl": 0.011632119305431843, "cur_lr": 4.999999873689376e-05, "entropy": 7.450540542602539, "total_loss": 18.479217529296875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.113397017121315, "vf_explained_var": 0.9858831763267517, "vf_loss": 18.58083724975586}, "grad_time_ms": 718.423}, "pid": 3934253, "time_total_s": 58740.391570568085, "episode_reward_mean": -151.6670901938319, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.29363151307973, "policy_reward_mean": {}, "episodes_total": 13824, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.5995533319289, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-55-40", "training_iteration": 576, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756529740, "episode_len_mean": 50.0, "timesteps_since_restore": 691200, "time_since_restore": 58740.391570568085, "time_this_iter_s": 115.96402978897095, "iterations_since_restore": 576}
+{"timesteps_total": 692400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 90758.536, "num_steps_sampled": 692400, "update_time_ms": 2.517, "num_steps_trained": 692400, "load_time_ms": 0.618, "default": {"kl": 0.014467747882008553, "cur_lr": 4.999999873689376e-05, "entropy": 7.817798137664795, "total_loss": 15.944793701171875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13392749428749084, "vf_explained_var": 0.9877843856811523, "vf_loss": 16.06407356262207}, "grad_time_ms": 723.72}, "pid": 3934253, "time_total_s": 58816.963297605515, "episode_reward_mean": -151.96521562869458, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.04381562923297, "policy_reward_mean": {}, "episodes_total": 13848, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.44307414123705, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-56-56", "training_iteration": 577, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756529816, "episode_len_mean": 50.0, "timesteps_since_restore": 692400, "time_since_restore": 58816.963297605515, "time_this_iter_s": 76.57172703742981, "iterations_since_restore": 577}
+{"timesteps_total": 693600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93282.483, "num_steps_sampled": 693600, "update_time_ms": 2.52, "num_steps_trained": 693600, "load_time_ms": 0.627, "default": {"kl": 0.01336402352899313, "cur_lr": 4.999999873689376e-05, "entropy": 7.232810974121094, "total_loss": 22.106884002685547, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11316641420125961, "vf_explained_var": 0.9828341007232666, "vf_loss": 22.206520080566406}, "grad_time_ms": 715.479}, "pid": 3934253, "time_total_s": 58940.09111189842, "episode_reward_mean": -151.83281265991272, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.04381562923297, "policy_reward_mean": {}, "episodes_total": 13872, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -140.01545140863857, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_06-58-59", "training_iteration": 578, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756529939, "episode_len_mean": 50.0, "timesteps_since_restore": 693600, "time_since_restore": 58940.09111189842, "time_this_iter_s": 123.12781429290771, "iterations_since_restore": 578}
+{"timesteps_total": 694800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 90937.401, "num_steps_sampled": 694800, "update_time_ms": 2.566, "num_steps_trained": 694800, "load_time_ms": 0.63, "default": {"kl": 0.012695417739450932, "cur_lr": 4.999999873689376e-05, "entropy": 7.237936973571777, "total_loss": 11.189031600952148, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10490371286869049, "vf_explained_var": 0.9903163909912109, "vf_loss": 11.281082153320312}, "grad_time_ms": 726.687}, "pid": 3934253, "time_total_s": 59033.962436914444, "episode_reward_mean": -151.86428952841857, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.04381562923297, "policy_reward_mean": {}, "episodes_total": 13896, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -140.01545140863857, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-00-33", "training_iteration": 579, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756530033, "episode_len_mean": 50.0, "timesteps_since_restore": 694800, "time_since_restore": 59033.962436914444, "time_this_iter_s": 93.87132501602173, "iterations_since_restore": 579}
+{"timesteps_total": 696000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92486.912, "num_steps_sampled": 696000, "update_time_ms": 2.619, "num_steps_trained": 696000, "load_time_ms": 0.632, "default": {"kl": 0.010564768686890602, "cur_lr": 4.999999873689376e-05, "entropy": 7.240657329559326, "total_loss": 28.40843391418457, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10259688645601273, "vf_explained_var": 0.9848769307136536, "vf_loss": 28.50033187866211}, "grad_time_ms": 727.581}, "pid": 3934253, "time_total_s": 59122.40687298775, "episode_reward_mean": -152.01809617008124, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.04381562923297, "policy_reward_mean": {}, "episodes_total": 13920, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -140.01545140863857, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-02-02", "training_iteration": 580, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756530122, "episode_len_mean": 50.0, "timesteps_since_restore": 696000, "time_since_restore": 59122.40687298775, "time_this_iter_s": 88.44443607330322, "iterations_since_restore": 580}
+{"timesteps_total": 697200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94174.302, "num_steps_sampled": 697200, "update_time_ms": 2.676, "num_steps_trained": 697200, "load_time_ms": 0.627, "default": {"kl": 0.012403911910951138, "cur_lr": 4.999999873689376e-05, "entropy": 7.399474143981934, "total_loss": 14.142861366271973, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10619829595088959, "vf_explained_var": 0.9894328713417053, "vf_loss": 14.23650074005127}, "grad_time_ms": 729.301}, "pid": 3934253, "time_total_s": 59204.50434041023, "episode_reward_mean": -152.04174332906396, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.16394158770373, "policy_reward_mean": {}, "episodes_total": 13944, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -140.01545140863857, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-03-24", "training_iteration": 581, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756530204, "episode_len_mean": 50.0, "timesteps_since_restore": 697200, "time_since_restore": 59204.50434041023, "time_this_iter_s": 82.09746742248535, "iterations_since_restore": 581}
+{"timesteps_total": 698400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93912.092, "num_steps_sampled": 698400, "update_time_ms": 2.694, "num_steps_trained": 698400, "load_time_ms": 0.63, "default": {"kl": 0.012006421573460102, "cur_lr": 4.999999873689376e-05, "entropy": 7.297507286071777, "total_loss": 15.31088924407959, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.09045369178056717, "vf_explained_var": 0.9878251552581787, "vf_loss": 15.389185905456543}, "grad_time_ms": 717.738}, "pid": 3934253, "time_total_s": 59289.04621386528, "episode_reward_mean": -152.30105653435592, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.16394158770373, "policy_reward_mean": {}, "episodes_total": 13968, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -149.0772481269036, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-04-49", "training_iteration": 582, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756530289, "episode_len_mean": 50.0, "timesteps_since_restore": 698400, "time_since_restore": 59289.04621386528, "time_this_iter_s": 84.54187345504761, "iterations_since_restore": 582}
+{"timesteps_total": 699600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 91703.455, "num_steps_sampled": 699600, "update_time_ms": 2.692, "num_steps_trained": 699600, "load_time_ms": 0.633, "default": {"kl": 0.013890719972550869, "cur_lr": 4.999999873689376e-05, "entropy": 7.63686466217041, "total_loss": 16.806406021118164, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13375477492809296, "vf_explained_var": 0.9869916439056396, "vf_loss": 16.926095962524414}, "grad_time_ms": 716.911}, "pid": 3934253, "time_total_s": 59365.7064769268, "episode_reward_mean": -152.14242325846607, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.10500275666027, "policy_reward_mean": {}, "episodes_total": 13992, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.06966000406916, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-06-05", "training_iteration": 583, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756530365, "episode_len_mean": 50.0, "timesteps_since_restore": 699600, "time_since_restore": 59365.7064769268, "time_this_iter_s": 76.66026306152344, "iterations_since_restore": 583}
+{"timesteps_total": 700800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92539.701, "num_steps_sampled": 700800, "update_time_ms": 2.683, "num_steps_trained": 700800, "load_time_ms": 0.632, "default": {"kl": 0.012830524705350399, "cur_lr": 4.999999873689376e-05, "entropy": 7.533829689025879, "total_loss": 16.497915267944336, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11289564520120621, "vf_explained_var": 0.9874927997589111, "vf_loss": 16.59781837463379}, "grad_time_ms": 720.897}, "pid": 3934253, "time_total_s": 59465.62331390381, "episode_reward_mean": -151.95782594633437, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.10500275666027, "policy_reward_mean": {}, "episodes_total": 14016, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.06966000406916, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-07-45", "training_iteration": 584, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756530465, "episode_len_mean": 50.0, "timesteps_since_restore": 700800, "time_since_restore": 59465.62331390381, "time_this_iter_s": 99.916836977005, "iterations_since_restore": 584}
+{"timesteps_total": 702000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93571.579, "num_steps_sampled": 702000, "update_time_ms": 2.642, "num_steps_trained": 702000, "load_time_ms": 0.636, "default": {"kl": 0.01353020966053009, "cur_lr": 4.999999873689376e-05, "entropy": 7.338387489318848, "total_loss": 13.563823699951172, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11937059462070465, "vf_explained_var": 0.9893013834953308, "vf_loss": 13.66949462890625}, "grad_time_ms": 732.656}, "pid": 3934253, "time_total_s": 59567.556359767914, "episode_reward_mean": -151.9279004109191, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.94330068728993, "policy_reward_mean": {}, "episodes_total": 14040, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.05374428274698, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-09-27", "training_iteration": 585, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756530567, "episode_len_mean": 50.0, "timesteps_since_restore": 702000, "time_since_restore": 59567.556359767914, "time_this_iter_s": 101.93304586410522, "iterations_since_restore": 585}
+{"timesteps_total": 703200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92809.295, "num_steps_sampled": 703200, "update_time_ms": 2.631, "num_steps_trained": 703200, "load_time_ms": 0.632, "default": {"kl": 0.013093508780002594, "cur_lr": 4.999999873689376e-05, "entropy": 7.247652053833008, "total_loss": 9.93628978729248, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12289997935295105, "vf_explained_var": 0.9934365749359131, "vf_loss": 10.04593276977539}, "grad_time_ms": 725.034}, "pid": 3934253, "time_total_s": 59675.8199942112, "episode_reward_mean": -151.74111855739798, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.94330068728993, "policy_reward_mean": {}, "episodes_total": 14064, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.05374428274698, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-11-15", "training_iteration": 586, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756530675, "episode_len_mean": 50.0, "timesteps_since_restore": 703200, "time_since_restore": 59675.8199942112, "time_this_iter_s": 108.26363444328308, "iterations_since_restore": 586}
+{"timesteps_total": 704400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93204.657, "num_steps_sampled": 704400, "update_time_ms": 2.598, "num_steps_trained": 704400, "load_time_ms": 0.626, "default": {"kl": 0.011846650391817093, "cur_lr": 4.999999873689376e-05, "entropy": 7.4674248695373535, "total_loss": 8.97598934173584, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12297610193490982, "vf_explained_var": 0.992843747138977, "vf_loss": 9.086971282958984}, "grad_time_ms": 686.738}, "pid": 3934253, "time_total_s": 59755.96127986908, "episode_reward_mean": -151.67899424222992, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.94330068728993, "policy_reward_mean": {}, "episodes_total": 14088, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.05374428274698, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-12-35", "training_iteration": 587, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756530755, "episode_len_mean": 50.0, "timesteps_since_restore": 704400, "time_since_restore": 59755.96127986908, "time_this_iter_s": 80.14128565788269, "iterations_since_restore": 587}
+{"timesteps_total": 705600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 90622.339, "num_steps_sampled": 705600, "update_time_ms": 2.583, "num_steps_trained": 705600, "load_time_ms": 0.621, "default": {"kl": 0.012162242084741592, "cur_lr": 4.999999873689376e-05, "entropy": 7.1651153564453125, "total_loss": 10.699304580688477, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.09133824706077576, "vf_explained_var": 0.9913658499717712, "vf_loss": 10.778327941894531}, "grad_time_ms": 690.449}, "pid": 3934253, "time_total_s": 59853.30315685272, "episode_reward_mean": -151.6922593391394, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.94330068728993, "policy_reward_mean": {}, "episodes_total": 14112, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.05374428274698, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-14-13", "training_iteration": 588, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756530853, "episode_len_mean": 50.0, "timesteps_since_restore": 705600, "time_since_restore": 59853.30315685272, "time_this_iter_s": 97.34187698364258, "iterations_since_restore": 588}
+{"timesteps_total": 706800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92077.509, "num_steps_sampled": 706800, "update_time_ms": 2.504, "num_steps_trained": 706800, "load_time_ms": 0.65, "default": {"kl": 0.01409607008099556, "cur_lr": 4.999999873689376e-05, "entropy": 7.442818641662598, "total_loss": 8.390382766723633, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12273070216178894, "vf_explained_var": 0.994513213634491, "vf_loss": 8.49884033203125}, "grad_time_ms": 678.541}, "pid": 3934253, "time_total_s": 59961.60695576668, "episode_reward_mean": -151.50156964718323, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.34163108568424, "policy_reward_mean": {}, "episodes_total": 14136, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.81389860999062, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-16-01", "training_iteration": 589, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756530961, "episode_len_mean": 50.0, "timesteps_since_restore": 706800, "time_since_restore": 59961.60695576668, "time_this_iter_s": 108.30379891395569, "iterations_since_restore": 589}
+{"timesteps_total": 708000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94196.583, "num_steps_sampled": 708000, "update_time_ms": 2.523, "num_steps_trained": 708000, "load_time_ms": 0.674, "default": {"kl": 0.012521314434707165, "cur_lr": 4.999999873689376e-05, "entropy": 7.323137283325195, "total_loss": 10.42292308807373, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13767369091510773, "vf_explained_var": 0.9919801354408264, "vf_loss": 10.547918319702148}, "grad_time_ms": 678.715}, "pid": 3934253, "time_total_s": 60071.24426102638, "episode_reward_mean": -151.5101446258732, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.03042833185478, "policy_reward_mean": {}, "episodes_total": 14160, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -140.0657561986548, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-17-51", "training_iteration": 590, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756531071, "episode_len_mean": 50.0, "timesteps_since_restore": 708000, "time_since_restore": 60071.24426102638, "time_this_iter_s": 109.63730525970459, "iterations_since_restore": 590}
+{"timesteps_total": 709200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 96082.76, "num_steps_sampled": 709200, "update_time_ms": 2.382, "num_steps_trained": 709200, "load_time_ms": 0.687, "default": {"kl": 0.011179720051586628, "cur_lr": 4.999999873689376e-05, "entropy": 6.874238967895508, "total_loss": 7.746560096740723, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12053953111171722, "vf_explained_var": 0.993653416633606, "vf_loss": 7.85577917098999}, "grad_time_ms": 673.693}, "pid": 3934253, "time_total_s": 60172.15154004097, "episode_reward_mean": -151.75990299087707, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.29179124485003, "policy_reward_mean": {}, "episodes_total": 14184, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.96449797766664, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-19-32", "training_iteration": 591, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756531172, "episode_len_mean": 50.0, "timesteps_since_restore": 709200, "time_since_restore": 60172.15154004097, "time_this_iter_s": 100.9072790145874, "iterations_since_restore": 591}
+{"timesteps_total": 710400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 98768.234, "num_steps_sampled": 710400, "update_time_ms": 2.353, "num_steps_trained": 710400, "load_time_ms": 0.697, "default": {"kl": 0.011559142731130123, "cur_lr": 4.999999873689376e-05, "entropy": 7.005263328552246, "total_loss": 9.97242546081543, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11214432120323181, "vf_explained_var": 0.9928156733512878, "vf_loss": 10.07286548614502}, "grad_time_ms": 679.973}, "pid": 3934253, "time_total_s": 60283.61056137085, "episode_reward_mean": -151.78678105090998, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.29179124485003, "policy_reward_mean": {}, "episodes_total": 14208, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.96449797766664, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-21-23", "training_iteration": 592, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756531283, "episode_len_mean": 50.0, "timesteps_since_restore": 710400, "time_since_restore": 60283.61056137085, "time_this_iter_s": 111.45902132987976, "iterations_since_restore": 592}
+{"timesteps_total": 711600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 99394.054, "num_steps_sampled": 711600, "update_time_ms": 2.327, "num_steps_trained": 711600, "load_time_ms": 0.699, "default": {"kl": 0.01326974667608738, "cur_lr": 4.999999873689376e-05, "entropy": 7.2245941162109375, "total_loss": 13.130340576171875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1341947764158249, "vf_explained_var": 0.9907307028770447, "vf_loss": 13.251100540161133}, "grad_time_ms": 673.367}, "pid": 3934253, "time_total_s": 60366.462671756744, "episode_reward_mean": -151.6194461096379, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.29179124485003, "policy_reward_mean": {}, "episodes_total": 14232, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.96449797766664, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-22-46", "training_iteration": 593, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756531366, "episode_len_mean": 50.0, "timesteps_since_restore": 711600, "time_since_restore": 60366.462671756744, "time_this_iter_s": 82.85211038589478, "iterations_since_restore": 593}
+{"timesteps_total": 712800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 99127.994, "num_steps_sampled": 712800, "update_time_ms": 2.34, "num_steps_trained": 712800, "load_time_ms": 0.702, "default": {"kl": 0.013715913519263268, "cur_lr": 4.999999873689376e-05, "entropy": 7.543368816375732, "total_loss": 12.581001281738281, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1392856240272522, "vf_explained_var": 0.9903258085250854, "vf_loss": 12.706399917602539}, "grad_time_ms": 693.584}, "pid": 3934253, "time_total_s": 60463.921142578125, "episode_reward_mean": -151.841262826727, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.29179124485003, "policy_reward_mean": {}, "episodes_total": 14256, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -135.49370618230293, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-24-24", "training_iteration": 594, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756531464, "episode_len_mean": 50.0, "timesteps_since_restore": 712800, "time_since_restore": 60463.921142578125, "time_this_iter_s": 97.45847082138062, "iterations_since_restore": 594}
+{"timesteps_total": 714000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 97260.312, "num_steps_sampled": 714000, "update_time_ms": 2.389, "num_steps_trained": 714000, "load_time_ms": 0.696, "default": {"kl": 0.014189370907843113, "cur_lr": 4.999999873689376e-05, "entropy": 7.186726093292236, "total_loss": 13.266934394836426, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11609578132629395, "vf_explained_var": 0.9903583526611328, "vf_loss": 13.368663787841797}, "grad_time_ms": 700.24}, "pid": 3934253, "time_total_s": 60547.2446205616, "episode_reward_mean": -151.6338203087051, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.4763763376484, "policy_reward_mean": {}, "episodes_total": 14280, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -135.49370618230293, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-25-47", "training_iteration": 595, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756531547, "episode_len_mean": 50.0, "timesteps_since_restore": 714000, "time_since_restore": 60547.2446205616, "time_this_iter_s": 83.32347798347473, "iterations_since_restore": 595}
+{"timesteps_total": 715200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95154.528, "num_steps_sampled": 715200, "update_time_ms": 2.427, "num_steps_trained": 715200, "load_time_ms": 0.691, "default": {"kl": 0.011939617805182934, "cur_lr": 4.999999873689376e-05, "entropy": 6.9514336585998535, "total_loss": 15.354241371154785, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1097557544708252, "vf_explained_var": 0.9887726306915283, "vf_loss": 15.451909065246582}, "grad_time_ms": 726.238}, "pid": 3934253, "time_total_s": 60634.71063876152, "episode_reward_mean": -151.64690577669663, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.4763763376484, "policy_reward_mean": {}, "episodes_total": 14304, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -135.49370618230293, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-27-14", "training_iteration": 596, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756531634, "episode_len_mean": 50.0, "timesteps_since_restore": 715200, "time_since_restore": 60634.71063876152, "time_this_iter_s": 87.46601819992065, "iterations_since_restore": 596}
+{"timesteps_total": 716400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93652.735, "num_steps_sampled": 716400, "update_time_ms": 2.603, "num_steps_trained": 716400, "load_time_ms": 0.696, "default": {"kl": 0.010730365291237831, "cur_lr": 4.999999873689376e-05, "entropy": 7.129854202270508, "total_loss": 18.592544555664062, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10800933092832565, "vf_explained_var": 0.9868574142456055, "vf_loss": 18.68968963623047}, "grad_time_ms": 760.154}, "pid": 3934253, "time_total_s": 60700.175520420074, "episode_reward_mean": -151.62646291803293, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.4763763376484, "policy_reward_mean": {}, "episodes_total": 14328, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -135.49370618230293, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-28-20", "training_iteration": 597, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756531700, "episode_len_mean": 50.0, "timesteps_since_restore": 716400, "time_since_restore": 60700.175520420074, "time_this_iter_s": 65.46488165855408, "iterations_since_restore": 597}
+{"timesteps_total": 717600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92106.567, "num_steps_sampled": 717600, "update_time_ms": 2.634, "num_steps_trained": 717600, "load_time_ms": 0.695, "default": {"kl": 0.01186525821685791, "cur_lr": 4.999999873689376e-05, "entropy": 7.202608108520508, "total_loss": 14.549711227416992, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10322961211204529, "vf_explained_var": 0.9883344173431396, "vf_loss": 14.640926361083984}, "grad_time_ms": 767.015}, "pid": 3934253, "time_total_s": 60782.12470793724, "episode_reward_mean": -151.53312025163663, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.025584113289, "policy_reward_mean": {}, "episodes_total": 14352, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -140.24453536788127, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-29-42", "training_iteration": 598, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756531782, "episode_len_mean": 50.0, "timesteps_since_restore": 717600, "time_since_restore": 60782.12470793724, "time_this_iter_s": 81.94918751716614, "iterations_since_restore": 598}
+{"timesteps_total": 718800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 89806.981, "num_steps_sampled": 718800, "update_time_ms": 2.689, "num_steps_trained": 718800, "load_time_ms": 0.659, "default": {"kl": 0.013243130408227444, "cur_lr": 4.999999873689376e-05, "entropy": 7.225077152252197, "total_loss": 18.396018981933594, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11471739411354065, "vf_explained_var": 0.9890309572219849, "vf_loss": 18.49732780456543}, "grad_time_ms": 775.626}, "pid": 3934253, "time_total_s": 60867.51846694946, "episode_reward_mean": -151.85888020618955, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.70198157607007, "policy_reward_mean": {}, "episodes_total": 14376, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -147.5689089852922, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-31-07", "training_iteration": 599, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756531867, "episode_len_mean": 50.0, "timesteps_since_restore": 718800, "time_since_restore": 60867.51846694946, "time_this_iter_s": 85.39375901222229, "iterations_since_restore": 599}
+{"timesteps_total": 720000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 91164.442, "num_steps_sampled": 720000, "update_time_ms": 2.654, "num_steps_trained": 720000, "load_time_ms": 0.636, "default": {"kl": 0.01327840518206358, "cur_lr": 4.999999873689376e-05, "entropy": 7.063553810119629, "total_loss": 15.72727108001709, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10396900773048401, "vf_explained_var": 0.9870481491088867, "vf_loss": 15.81779670715332}, "grad_time_ms": 776.607}, "pid": 3934253, "time_total_s": 60990.73926758766, "episode_reward_mean": -151.6618704476836, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.70198157607007, "policy_reward_mean": {}, "episodes_total": 14400, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -147.3681517481075, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-33-10", "training_iteration": 600, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756531990, "episode_len_mean": 50.0, "timesteps_since_restore": 720000, "time_since_restore": 60990.73926758766, "time_this_iter_s": 123.22080063819885, "iterations_since_restore": 600}
+{"timesteps_total": 721200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 91809.072, "num_steps_sampled": 721200, "update_time_ms": 2.679, "num_steps_trained": 721200, "load_time_ms": 0.627, "default": {"kl": 0.011526600457727909, "cur_lr": 4.999999873689376e-05, "entropy": 7.281680583953857, "total_loss": 18.19324493408203, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11207922548055649, "vf_explained_var": 0.9853324890136719, "vf_loss": 18.29365348815918}, "grad_time_ms": 767.64}, "pid": 3934253, "time_total_s": 61098.00409555435, "episode_reward_mean": -151.7562869870905, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.70198157607007, "policy_reward_mean": {}, "episodes_total": 14424, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -147.3681517481075, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-34-58", "training_iteration": 601, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756532098, "episode_len_mean": 50.0, "timesteps_since_restore": 721200, "time_since_restore": 61098.00409555435, "time_this_iter_s": 107.26482796669006, "iterations_since_restore": 601}
+{"timesteps_total": 722400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 90087.989, "num_steps_sampled": 722400, "update_time_ms": 2.721, "num_steps_trained": 722400, "load_time_ms": 0.617, "default": {"kl": 0.01224952470511198, "cur_lr": 4.999999873689376e-05, "entropy": 7.450062274932861, "total_loss": 20.476715087890625, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11732758581638336, "vf_explained_var": 0.9841304421424866, "vf_loss": 20.581642150878906}, "grad_time_ms": 783.345}, "pid": 3934253, "time_total_s": 61192.40952897072, "episode_reward_mean": -152.16321712634695, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.70198157607007, "policy_reward_mean": {}, "episodes_total": 14448, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -147.3681517481075, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-36-32", "training_iteration": 602, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756532192, "episode_len_mean": 50.0, "timesteps_since_restore": 722400, "time_since_restore": 61192.40952897072, "time_this_iter_s": 94.40543341636658, "iterations_since_restore": 602}
+{"timesteps_total": 723600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 90615.528, "num_steps_sampled": 723600, "update_time_ms": 2.768, "num_steps_trained": 723600, "load_time_ms": 0.614, "default": {"kl": 0.015451265498995781, "cur_lr": 4.999999873689376e-05, "entropy": 7.148335933685303, "total_loss": 11.77541732788086, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11515364050865173, "vf_explained_var": 0.9903786182403564, "vf_loss": 11.874926567077637}, "grad_time_ms": 787.36}, "pid": 3934253, "time_total_s": 61280.579090833664, "episode_reward_mean": -152.08872249419608, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.47636487522485, "policy_reward_mean": {}, "episodes_total": 14472, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.45120966923372, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-38-00", "training_iteration": 603, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756532280, "episode_len_mean": 50.0, "timesteps_since_restore": 723600, "time_since_restore": 61280.579090833664, "time_this_iter_s": 88.16956186294556, "iterations_since_restore": 603}
+{"timesteps_total": 724800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 87780.925, "num_steps_sampled": 724800, "update_time_ms": 2.845, "num_steps_trained": 724800, "load_time_ms": 0.615, "default": {"kl": 0.014070438221096992, "cur_lr": 4.999999873689376e-05, "entropy": 7.109999656677246, "total_loss": 10.172300338745117, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12829262018203735, "vf_explained_var": 0.9918432235717773, "vf_loss": 10.286346435546875}, "grad_time_ms": 784.586}, "pid": 3934253, "time_total_s": 61349.66434311867, "episode_reward_mean": -152.43345766828455, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.9578979275503, "policy_reward_mean": {}, "episodes_total": 14496, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.45120966923372, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-39-10", "training_iteration": 604, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756532350, "episode_len_mean": 50.0, "timesteps_since_restore": 724800, "time_since_restore": 61349.66434311867, "time_this_iter_s": 69.08525228500366, "iterations_since_restore": 604}
+{"timesteps_total": 726000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 86887.291, "num_steps_sampled": 726000, "update_time_ms": 2.863, "num_steps_trained": 726000, "load_time_ms": 0.611, "default": {"kl": 0.014225076884031296, "cur_lr": 4.999999873689376e-05, "entropy": 6.7051849365234375, "total_loss": 12.970050811767578, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12973900139331818, "vf_explained_var": 0.9914548397064209, "vf_loss": 13.085387229919434}, "grad_time_ms": 790.49}, "pid": 3934253, "time_total_s": 61424.110256910324, "episode_reward_mean": -152.20568940453862, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.9578979275503, "policy_reward_mean": {}, "episodes_total": 14520, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.36383384103294, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-40-24", "training_iteration": 605, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756532424, "episode_len_mean": 50.0, "timesteps_since_restore": 726000, "time_since_restore": 61424.110256910324, "time_this_iter_s": 74.4459137916565, "iterations_since_restore": 605}
+{"timesteps_total": 727200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 86061.442, "num_steps_sampled": 727200, "update_time_ms": 2.874, "num_steps_trained": 727200, "load_time_ms": 0.614, "default": {"kl": 0.01197892241179943, "cur_lr": 4.999999873689376e-05, "entropy": 7.155910015106201, "total_loss": 16.024038314819336, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12084892392158508, "vf_explained_var": 0.9875774383544922, "vf_loss": 16.13275909423828}, "grad_time_ms": 785.744}, "pid": 3934253, "time_total_s": 61503.27158164978, "episode_reward_mean": -151.8809027765825, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.9578979275503, "policy_reward_mean": {}, "episodes_total": 14544, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.36383384103294, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-41-43", "training_iteration": 606, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756532503, "episode_len_mean": 50.0, "timesteps_since_restore": 727200, "time_since_restore": 61503.27158164978, "time_this_iter_s": 79.16132473945618, "iterations_since_restore": 606}
+{"timesteps_total": 728400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 89229.81, "num_steps_sampled": 728400, "update_time_ms": 2.72, "num_steps_trained": 728400, "load_time_ms": 0.609, "default": {"kl": 0.012574922293424606, "cur_lr": 4.999999873689376e-05, "entropy": 6.90366792678833, "total_loss": 15.371590614318848, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11707106977701187, "vf_explained_var": 0.9888659715652466, "vf_loss": 15.475930213928223}, "grad_time_ms": 786.113}, "pid": 3934253, "time_total_s": 61600.42127537727, "episode_reward_mean": -151.49330662677073, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -167.9578979275503, "policy_reward_mean": {}, "episodes_total": 14568, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.36383384103294, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-43-20", "training_iteration": 607, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756532600, "episode_len_mean": 50.0, "timesteps_since_restore": 728400, "time_since_restore": 61600.42127537727, "time_this_iter_s": 97.14969372749329, "iterations_since_restore": 607}
+{"timesteps_total": 729600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 89682.331, "num_steps_sampled": 729600, "update_time_ms": 2.642, "num_steps_trained": 729600, "load_time_ms": 0.603, "default": {"kl": 0.011613764800131321, "cur_lr": 4.999999873689376e-05, "entropy": 7.039552688598633, "total_loss": 18.735050201416016, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.09857099503278732, "vf_explained_var": 0.9856353402137756, "vf_loss": 18.821861267089844}, "grad_time_ms": 775.753}, "pid": 3934253, "time_total_s": 61686.7905664444, "episode_reward_mean": -151.40655440639014, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.37392545490812, "policy_reward_mean": {}, "episodes_total": 14592, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.36383384103294, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-44-47", "training_iteration": 608, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756532687, "episode_len_mean": 50.0, "timesteps_since_restore": 729600, "time_since_restore": 61686.7905664444, "time_this_iter_s": 86.36929106712341, "iterations_since_restore": 608}
+{"timesteps_total": 730800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92765.788, "num_steps_sampled": 730800, "update_time_ms": 2.676, "num_steps_trained": 730800, "load_time_ms": 0.608, "default": {"kl": 0.01337195560336113, "cur_lr": 4.999999873689376e-05, "entropy": 6.899675369262695, "total_loss": 14.00875186920166, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10345722734928131, "vf_explained_var": 0.9890311360359192, "vf_loss": 14.09867000579834}, "grad_time_ms": 771.632}, "pid": 3934253, "time_total_s": 61802.97908568382, "episode_reward_mean": -151.37933562448453, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.37392545490812, "policy_reward_mean": {}, "episodes_total": 14616, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.66864327226546, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-46-43", "training_iteration": 609, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756532803, "episode_len_mean": 50.0, "timesteps_since_restore": 730800, "time_since_restore": 61802.97908568382, "time_this_iter_s": 116.18851923942566, "iterations_since_restore": 609}
+{"timesteps_total": 732000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 88623.712, "num_steps_sampled": 732000, "update_time_ms": 2.678, "num_steps_trained": 732000, "load_time_ms": 0.608, "default": {"kl": 0.012054665014147758, "cur_lr": 4.999999873689376e-05, "entropy": 6.8780741691589355, "total_loss": 19.48150634765625, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10850019007921219, "vf_explained_var": 0.9851784110069275, "vf_loss": 19.577804565429688}, "grad_time_ms": 774.118}, "pid": 3934253, "time_total_s": 61884.80423927307, "episode_reward_mean": -151.41443393500313, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -161.8105244534275, "policy_reward_mean": {}, "episodes_total": 14640, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.63613739987613, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-48-05", "training_iteration": 610, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756532885, "episode_len_mean": 50.0, "timesteps_since_restore": 732000, "time_since_restore": 61884.80423927307, "time_this_iter_s": 81.82515358924866, "iterations_since_restore": 610}
+{"timesteps_total": 733200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 86950.836, "num_steps_sampled": 733200, "update_time_ms": 2.646, "num_steps_trained": 733200, "load_time_ms": 0.605, "default": {"kl": 0.010721195489168167, "cur_lr": 4.999999873689376e-05, "entropy": 6.638162136077881, "total_loss": 11.81612491607666, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.09991056472063065, "vf_explained_var": 0.9899557828903198, "vf_loss": 11.905179977416992}, "grad_time_ms": 787.336}, "pid": 3934253, "time_total_s": 61975.47169351578, "episode_reward_mean": -151.543765128637, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -161.8105244534275, "policy_reward_mean": {}, "episodes_total": 14664, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.63613739987613, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-49-36", "training_iteration": 611, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756532976, "episode_len_mean": 50.0, "timesteps_since_restore": 733200, "time_since_restore": 61975.47169351578, "time_this_iter_s": 90.6674542427063, "iterations_since_restore": 611}
+{"timesteps_total": 734400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 85627.85, "num_steps_sampled": 734400, "update_time_ms": 2.679, "num_steps_trained": 734400, "load_time_ms": 0.603, "default": {"kl": 0.013192672282457352, "cur_lr": 4.999999873689376e-05, "entropy": 7.051061630249023, "total_loss": 7.227845191955566, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1238882839679718, "vf_explained_var": 0.9936915636062622, "vf_loss": 7.338375568389893}, "grad_time_ms": 784.194}, "pid": 3934253, "time_total_s": 62056.61653780937, "episode_reward_mean": -151.44520802968128, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -161.8105244534275, "policy_reward_mean": {}, "episodes_total": 14688, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.63613739987613, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-50-57", "training_iteration": 612, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756533057, "episode_len_mean": 50.0, "timesteps_since_restore": 734400, "time_since_restore": 62056.61653780937, "time_this_iter_s": 81.14484429359436, "iterations_since_restore": 612}
+{"timesteps_total": 735600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 83882.7, "num_steps_sampled": 735600, "update_time_ms": 2.682, "num_steps_trained": 735600, "load_time_ms": 0.603, "default": {"kl": 0.011554243043065071, "cur_lr": 4.999999873689376e-05, "entropy": 6.660586833953857, "total_loss": 10.637908935546875, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.09981474280357361, "vf_explained_var": 0.9911506175994873, "vf_loss": 10.726024627685547}, "grad_time_ms": 786.349}, "pid": 3934253, "time_total_s": 62127.3542406559, "episode_reward_mean": -151.22924727354598, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -159.87547682683376, "policy_reward_mean": {}, "episodes_total": 14712, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.68615046976356, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-52-07", "training_iteration": 613, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756533127, "episode_len_mean": 50.0, "timesteps_since_restore": 735600, "time_since_restore": 62127.3542406559, "time_this_iter_s": 70.7377028465271, "iterations_since_restore": 613}
+{"timesteps_total": 736800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 85648.444, "num_steps_sampled": 736800, "update_time_ms": 2.603, "num_steps_trained": 736800, "load_time_ms": 0.602, "default": {"kl": 0.013088869862258434, "cur_lr": 4.999999873689376e-05, "entropy": 7.049310207366943, "total_loss": 12.22148609161377, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13127782940864563, "vf_explained_var": 0.990606963634491, "vf_loss": 12.33951187133789}, "grad_time_ms": 792.797}, "pid": 3934253, "time_total_s": 62214.16077184677, "episode_reward_mean": -151.31984968712513, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -162.72055261835348, "policy_reward_mean": {}, "episodes_total": 14736, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.68615046976356, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-53-34", "training_iteration": 614, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756533214, "episode_len_mean": 50.0, "timesteps_since_restore": 736800, "time_since_restore": 62214.16077184677, "time_this_iter_s": 86.80653119087219, "iterations_since_restore": 614}
+{"timesteps_total": 738000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 88803.749, "num_steps_sampled": 738000, "update_time_ms": 2.56, "num_steps_trained": 738000, "load_time_ms": 0.607, "default": {"kl": 0.012645702809095383, "cur_lr": 4.999999873689376e-05, "entropy": 6.928812026977539, "total_loss": 10.566925048828125, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11527398228645325, "vf_explained_var": 0.9913132190704346, "vf_loss": 10.669394493103027}, "grad_time_ms": 785.529}, "pid": 3934253, "time_total_s": 62320.08752441406, "episode_reward_mean": -151.36229052843026, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -162.72055261835348, "policy_reward_mean": {}, "episodes_total": 14760, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.68615046976356, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-55-20", "training_iteration": 615, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756533320, "episode_len_mean": 50.0, "timesteps_since_restore": 738000, "time_since_restore": 62320.08752441406, "time_this_iter_s": 105.92675256729126, "iterations_since_restore": 615}
+{"timesteps_total": 739200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92349.761, "num_steps_sampled": 739200, "update_time_ms": 2.568, "num_steps_trained": 739200, "load_time_ms": 0.632, "default": {"kl": 0.01190970279276371, "cur_lr": 4.999999873689376e-05, "entropy": 7.102513790130615, "total_loss": 9.939801216125488, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11454781144857407, "vf_explained_var": 0.9917420148849487, "vf_loss": 10.042292594909668}, "grad_time_ms": 756.918}, "pid": 3934253, "time_total_s": 62434.422278404236, "episode_reward_mean": -151.60210316670384, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -162.72055261835348, "policy_reward_mean": {}, "episodes_total": 14784, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.68615046976356, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-57-15", "training_iteration": 616, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756533435, "episode_len_mean": 50.0, "timesteps_since_restore": 739200, "time_since_restore": 62434.422278404236, "time_this_iter_s": 114.33475399017334, "iterations_since_restore": 616}
+{"timesteps_total": 740400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 91534.552, "num_steps_sampled": 740400, "update_time_ms": 2.573, "num_steps_trained": 740400, "load_time_ms": 0.633, "default": {"kl": 0.013185751624405384, "cur_lr": 4.999999873689376e-05, "entropy": 6.751632213592529, "total_loss": 10.361977577209473, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1103510633111, "vf_explained_var": 0.9916035532951355, "vf_loss": 10.458977699279785}, "grad_time_ms": 752.487}, "pid": 3934253, "time_total_s": 62523.37660694122, "episode_reward_mean": -151.44191442940803, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -162.72055261835348, "policy_reward_mean": {}, "episodes_total": 14808, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.50587769520746, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_07-58-43", "training_iteration": 617, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756533523, "episode_len_mean": 50.0, "timesteps_since_restore": 740400, "time_since_restore": 62523.37660694122, "time_this_iter_s": 88.9543285369873, "iterations_since_restore": 617}
+{"timesteps_total": 741600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92642.693, "num_steps_sampled": 741600, "update_time_ms": 2.659, "num_steps_trained": 741600, "load_time_ms": 0.637, "default": {"kl": 0.012119187042117119, "cur_lr": 4.999999873689376e-05, "entropy": 6.803467750549316, "total_loss": 8.588187217712402, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10707652568817139, "vf_explained_var": 0.9933609366416931, "vf_loss": 8.682992935180664}, "grad_time_ms": 761.442}, "pid": 3934253, "time_total_s": 62620.91872525215, "episode_reward_mean": -151.5307977259673, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.73135388198196, "policy_reward_mean": {}, "episodes_total": 14832, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.50587769520746, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-00-21", "training_iteration": 618, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756533621, "episode_len_mean": 50.0, "timesteps_since_restore": 741600, "time_since_restore": 62620.91872525215, "time_this_iter_s": 97.54211831092834, "iterations_since_restore": 618}
+{"timesteps_total": 742800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 91459.981, "num_steps_sampled": 742800, "update_time_ms": 2.627, "num_steps_trained": 742800, "load_time_ms": 0.658, "default": {"kl": 0.013423633761703968, "cur_lr": 4.999999873689376e-05, "entropy": 6.8948893547058105, "total_loss": 12.06845474243164, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11208604276180267, "vf_explained_var": 0.9899507164955139, "vf_loss": 12.166949272155762}, "grad_time_ms": 756.378}, "pid": 3934253, "time_total_s": 62725.23010516167, "episode_reward_mean": -151.49945627137046, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.73135388198196, "policy_reward_mean": {}, "episodes_total": 14856, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.50587769520746, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-02-05", "training_iteration": 619, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756533725, "episode_len_mean": 50.0, "timesteps_since_restore": 742800, "time_since_restore": 62725.23010516167, "time_this_iter_s": 104.31137990951538, "iterations_since_restore": 619}
+{"timesteps_total": 744000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93561.719, "num_steps_sampled": 744000, "update_time_ms": 2.654, "num_steps_trained": 744000, "load_time_ms": 0.66, "default": {"kl": 0.01322434563189745, "cur_lr": 4.999999873689376e-05, "entropy": 7.0396013259887695, "total_loss": 13.521455764770508, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11213520169258118, "vf_explained_var": 0.9895057678222656, "vf_loss": 13.62020206451416}, "grad_time_ms": 756.048}, "pid": 3934253, "time_total_s": 62828.070397138596, "episode_reward_mean": -151.12993458929512, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.73135388198196, "policy_reward_mean": {}, "episodes_total": 14880, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.50587769520746, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-03-48", "training_iteration": 620, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756533828, "episode_len_mean": 50.0, "timesteps_since_restore": 744000, "time_since_restore": 62828.070397138596, "time_this_iter_s": 102.84029197692871, "iterations_since_restore": 620}
+{"timesteps_total": 745200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93545.314, "num_steps_sampled": 745200, "update_time_ms": 2.694, "num_steps_trained": 745200, "load_time_ms": 0.663, "default": {"kl": 0.012171603739261627, "cur_lr": 4.999999873689376e-05, "entropy": 6.692158222198486, "total_loss": 6.1855010986328125, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11720164865255356, "vf_explained_var": 0.9949302673339844, "vf_loss": 6.290379047393799}, "grad_time_ms": 752.822}, "pid": 3934253, "time_total_s": 62918.541640520096, "episode_reward_mean": -151.14761090808298, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.73135388198196, "policy_reward_mean": {}, "episodes_total": 14904, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -140.95648442901637, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-05-19", "training_iteration": 621, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756533919, "episode_len_mean": 50.0, "timesteps_since_restore": 745200, "time_since_restore": 62918.541640520096, "time_this_iter_s": 90.47124338150024, "iterations_since_restore": 621}
+{"timesteps_total": 746400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93932.102, "num_steps_sampled": 746400, "update_time_ms": 2.631, "num_steps_trained": 746400, "load_time_ms": 0.661, "default": {"kl": 0.013156522065401077, "cur_lr": 4.999999873689376e-05, "entropy": 6.82674503326416, "total_loss": 14.24813461303711, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11366318166255951, "vf_explained_var": 0.9880519509315491, "vf_loss": 14.34847640991211}, "grad_time_ms": 751.554}, "pid": 3934253, "time_total_s": 63003.54139351845, "episode_reward_mean": -151.18667908639287, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.73135388198196, "policy_reward_mean": {}, "episodes_total": 14928, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.44292993723454, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-06-44", "training_iteration": 622, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756534004, "episode_len_mean": 50.0, "timesteps_since_restore": 746400, "time_since_restore": 63003.54139351845, "time_this_iter_s": 84.99975299835205, "iterations_since_restore": 622}
+{"timesteps_total": 747600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 97851.94, "num_steps_sampled": 747600, "update_time_ms": 2.617, "num_steps_trained": 747600, "load_time_ms": 0.662, "default": {"kl": 0.01315502543002367, "cur_lr": 4.999999873689376e-05, "entropy": 6.802591323852539, "total_loss": 7.619611740112305, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12538450956344604, "vf_explained_var": 0.9940614700317383, "vf_loss": 7.7316765785217285}, "grad_time_ms": 752.627}, "pid": 3934253, "time_total_s": 63113.488800525665, "episode_reward_mean": -151.09440919344684, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.42489643038954, "policy_reward_mean": {}, "episodes_total": 14952, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.44292993723454, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-08-34", "training_iteration": 623, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756534114, "episode_len_mean": 50.0, "timesteps_since_restore": 747600, "time_since_restore": 63113.488800525665, "time_this_iter_s": 109.94740700721741, "iterations_since_restore": 623}
+{"timesteps_total": 748800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 98750.161, "num_steps_sampled": 748800, "update_time_ms": 2.613, "num_steps_trained": 748800, "load_time_ms": 0.668, "default": {"kl": 0.011096199974417686, "cur_lr": 4.999999873689376e-05, "entropy": 6.810266494750977, "total_loss": 11.71121597290039, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11723963171243668, "vf_explained_var": 0.9900689721107483, "vf_loss": 11.817220687866211}, "grad_time_ms": 747.582}, "pid": 3934253, "time_total_s": 63209.22741794586, "episode_reward_mean": -151.22068244121274, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.42489643038954, "policy_reward_mean": {}, "episodes_total": 14976, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.44292993723454, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-10-09", "training_iteration": 624, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756534209, "episode_len_mean": 50.0, "timesteps_since_restore": 748800, "time_since_restore": 63209.22741794586, "time_this_iter_s": 95.73861742019653, "iterations_since_restore": 624}
+{"timesteps_total": 750000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 100088.435, "num_steps_sampled": 750000, "update_time_ms": 2.645, "num_steps_trained": 750000, "load_time_ms": 0.668, "default": {"kl": 0.010650668293237686, "cur_lr": 4.999999873689376e-05, "entropy": 6.672452926635742, "total_loss": 11.082969665527344, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.110077865421772, "vf_explained_var": 0.9915443062782288, "vf_loss": 11.18226432800293}, "grad_time_ms": 757.859}, "pid": 3934253, "time_total_s": 63328.63909459114, "episode_reward_mean": -151.40743659097222, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.42489643038954, "policy_reward_mean": {}, "episodes_total": 15000, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.44292993723454, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-12-09", "training_iteration": 625, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756534329, "episode_len_mean": 50.0, "timesteps_since_restore": 750000, "time_since_restore": 63328.63909459114, "time_this_iter_s": 119.41167664527893, "iterations_since_restore": 625}
+{"timesteps_total": 751200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 100166.948, "num_steps_sampled": 751200, "update_time_ms": 2.642, "num_steps_trained": 751200, "load_time_ms": 0.64, "default": {"kl": 0.010317239910364151, "cur_lr": 4.999999873689376e-05, "entropy": 6.8698625564575195, "total_loss": 23.211606979370117, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10611388087272644, "vf_explained_var": 0.9846295118331909, "vf_loss": 23.307273864746094}, "grad_time_ms": 773.449}, "pid": 3934253, "time_total_s": 63443.91581988335, "episode_reward_mean": -151.66759500747776, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -163.6711908111485, "policy_reward_mean": {}, "episodes_total": 15024, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -144.90419533232387, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-14-04", "training_iteration": 626, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756534444, "episode_len_mean": 50.0, "timesteps_since_restore": 751200, "time_since_restore": 63443.91581988335, "time_this_iter_s": 115.27672529220581, "iterations_since_restore": 626}
+{"timesteps_total": 752400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 103369.137, "num_steps_sampled": 752400, "update_time_ms": 2.692, "num_steps_trained": 752400, "load_time_ms": 0.631, "default": {"kl": 0.013233959674835205, "cur_lr": 4.999999873689376e-05, "entropy": 6.71226692199707, "total_loss": 15.275715827941895, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12592655420303345, "vf_explained_var": 0.989207923412323, "vf_loss": 15.38824462890625}, "grad_time_ms": 744.545}, "pid": 3934253, "time_total_s": 63564.602367162704, "episode_reward_mean": -151.4929936336819, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -163.6711908111485, "policy_reward_mean": {}, "episodes_total": 15048, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.5958818274101, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-16-05", "training_iteration": 627, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756534565, "episode_len_mean": 50.0, "timesteps_since_restore": 752400, "time_since_restore": 63564.602367162704, "time_this_iter_s": 120.68654727935791, "iterations_since_restore": 627}
+{"timesteps_total": 753600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 102544.866, "num_steps_sampled": 753600, "update_time_ms": 2.595, "num_steps_trained": 753600, "load_time_ms": 0.631, "default": {"kl": 0.012792283669114113, "cur_lr": 4.999999873689376e-05, "entropy": 6.9640302658081055, "total_loss": 19.60162925720215, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11187508702278137, "vf_explained_var": 0.9851052761077881, "vf_loss": 19.700551986694336}, "grad_time_ms": 736.195}, "pid": 3934253, "time_total_s": 63653.81639122963, "episode_reward_mean": -151.669466943407, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.4776514019978, "policy_reward_mean": {}, "episodes_total": 15072, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.5958818274101, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-17-34", "training_iteration": 628, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756534654, "episode_len_mean": 50.0, "timesteps_since_restore": 753600, "time_since_restore": 63653.81639122963, "time_this_iter_s": 89.21402406692505, "iterations_since_restore": 628}
+{"timesteps_total": 754800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 101229.522, "num_steps_sampled": 754800, "update_time_ms": 2.535, "num_steps_trained": 754800, "load_time_ms": 0.607, "default": {"kl": 0.010650486685335636, "cur_lr": 4.999999873689376e-05, "entropy": 6.8045148849487305, "total_loss": 13.200957298278809, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12197298556566238, "vf_explained_var": 0.9894353151321411, "vf_loss": 13.312145233154297}, "grad_time_ms": 745.2}, "pid": 3934253, "time_total_s": 63745.061317682266, "episode_reward_mean": -151.63851588055306, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.4776514019978, "policy_reward_mean": {}, "episodes_total": 15096, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.5958818274101, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-19-05", "training_iteration": 629, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756534745, "episode_len_mean": 50.0, "timesteps_since_restore": 754800, "time_since_restore": 63745.061317682266, "time_this_iter_s": 91.24492645263672, "iterations_since_restore": 629}
+{"timesteps_total": 756000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 100842.923, "num_steps_sampled": 756000, "update_time_ms": 2.551, "num_steps_trained": 756000, "load_time_ms": 0.609, "default": {"kl": 0.010836427100002766, "cur_lr": 4.999999873689376e-05, "entropy": 6.896514415740967, "total_loss": 20.006378173828125, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11461702734231949, "vf_explained_var": 0.9843152165412903, "vf_loss": 20.110023498535156}, "grad_time_ms": 724.066}, "pid": 3934253, "time_total_s": 63843.82303214073, "episode_reward_mean": -151.58272379672468, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.4776514019978, "policy_reward_mean": {}, "episodes_total": 15120, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.5958818274101, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-20-44", "training_iteration": 630, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756534844, "episode_len_mean": 50.0, "timesteps_since_restore": 756000, "time_since_restore": 63843.82303214073, "time_this_iter_s": 98.76171445846558, "iterations_since_restore": 630}
+{"timesteps_total": 757200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 101480.276, "num_steps_sampled": 757200, "update_time_ms": 2.489, "num_steps_trained": 757200, "load_time_ms": 0.61, "default": {"kl": 0.012623208574950695, "cur_lr": 4.999999873689376e-05, "entropy": 6.808897972106934, "total_loss": 15.05218505859375, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10825130343437195, "vf_explained_var": 0.9871785640716553, "vf_loss": 15.147655487060547}, "grad_time_ms": 723.758}, "pid": 3934253, "time_total_s": 63940.66364145279, "episode_reward_mean": -151.53546933288317, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.4776514019978, "policy_reward_mean": {}, "episodes_total": 15144, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.66980860902888, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-22-21", "training_iteration": 631, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756534941, "episode_len_mean": 50.0, "timesteps_since_restore": 757200, "time_since_restore": 63940.66364145279, "time_this_iter_s": 96.8406093120575, "iterations_since_restore": 631}
+{"timesteps_total": 758400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 100884.692, "num_steps_sampled": 758400, "update_time_ms": 2.45, "num_steps_trained": 758400, "load_time_ms": 0.64, "default": {"kl": 0.01185892429202795, "cur_lr": 4.999999873689376e-05, "entropy": 6.723665714263916, "total_loss": 11.030750274658203, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10723483562469482, "vf_explained_var": 0.9907653331756592, "vf_loss": 11.125978469848633}, "grad_time_ms": 717.742}, "pid": 3934253, "time_total_s": 64019.64652919769, "episode_reward_mean": -151.36588981547644, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -158.80067592354442, "policy_reward_mean": {}, "episodes_total": 15168, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.66980860902888, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-23-40", "training_iteration": 632, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756535020, "episode_len_mean": 50.0, "timesteps_since_restore": 758400, "time_since_restore": 64019.64652919769, "time_this_iter_s": 78.98288774490356, "iterations_since_restore": 632}
+{"timesteps_total": 759600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 98671.08, "num_steps_sampled": 759600, "update_time_ms": 2.454, "num_steps_trained": 759600, "load_time_ms": 0.646, "default": {"kl": 0.012938495725393295, "cur_lr": 4.999999873689376e-05, "entropy": 6.659440994262695, "total_loss": 13.684261322021484, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11048747599124908, "vf_explained_var": 0.9885939359664917, "vf_loss": 13.781648635864258}, "grad_time_ms": 709.359}, "pid": 3934253, "time_total_s": 64107.37429046631, "episode_reward_mean": -151.65608875992902, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.02453192680858, "policy_reward_mean": {}, "episodes_total": 15192, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.51797225071994, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-25-08", "training_iteration": 633, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756535108, "episode_len_mean": 50.0, "timesteps_since_restore": 759600, "time_since_restore": 64107.37429046631, "time_this_iter_s": 87.72776126861572, "iterations_since_restore": 633}
+{"timesteps_total": 760800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 98553.06, "num_steps_sampled": 760800, "update_time_ms": 2.495, "num_steps_trained": 760800, "load_time_ms": 0.636, "default": {"kl": 0.012192122638225555, "cur_lr": 4.999999873689376e-05, "entropy": 6.7756500244140625, "total_loss": 8.989873886108398, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11869990825653076, "vf_explained_var": 0.9927349090576172, "vf_loss": 9.096230506896973}, "grad_time_ms": 714.761}, "pid": 3934253, "time_total_s": 64201.98797130585, "episode_reward_mean": -151.59236133085568, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.02453192680858, "policy_reward_mean": {}, "episodes_total": 15216, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -147.74976120463958, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-26-42", "training_iteration": 634, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756535202, "episode_len_mean": 50.0, "timesteps_since_restore": 760800, "time_since_restore": 64201.98797130585, "time_this_iter_s": 94.61368083953857, "iterations_since_restore": 634}
+{"timesteps_total": 762000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 99148.287, "num_steps_sampled": 762000, "update_time_ms": 2.504, "num_steps_trained": 762000, "load_time_ms": 0.633, "default": {"kl": 0.012157265096902847, "cur_lr": 4.999999873689376e-05, "entropy": 6.482801914215088, "total_loss": 9.540739059448242, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11832654476165771, "vf_explained_var": 0.9925000071525574, "vf_loss": 9.646757125854492}, "grad_time_ms": 704.804}, "pid": 3934253, "time_total_s": 64327.25306916237, "episode_reward_mean": -151.5300366605358, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.02453192680858, "policy_reward_mean": {}, "episodes_total": 15240, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -145.57923844267256, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-28-48", "training_iteration": 635, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756535328, "episode_len_mean": 50.0, "timesteps_since_restore": 762000, "time_since_restore": 64327.25306916237, "time_this_iter_s": 125.2650978565216, "iterations_since_restore": 635}
+{"timesteps_total": 763200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94685.493, "num_steps_sampled": 763200, "update_time_ms": 2.451, "num_steps_trained": 763200, "load_time_ms": 0.639, "default": {"kl": 0.011522796005010605, "cur_lr": 4.999999873689376e-05, "entropy": 6.750631332397461, "total_loss": 14.481304168701172, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11807700991630554, "vf_explained_var": 0.9886033535003662, "vf_loss": 14.587714195251465}, "grad_time_ms": 725.526}, "pid": 3934253, "time_total_s": 64398.10787272453, "episode_reward_mean": -151.58026426985145, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.86872080682477, "policy_reward_mean": {}, "episodes_total": 15264, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -135.66465642046649, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-29-59", "training_iteration": 636, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756535399, "episode_len_mean": 50.0, "timesteps_since_restore": 763200, "time_since_restore": 64398.10787272453, "time_this_iter_s": 70.8548035621643, "iterations_since_restore": 636}
+{"timesteps_total": 764400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 91776.876, "num_steps_sampled": 764400, "update_time_ms": 2.38, "num_steps_trained": 764400, "load_time_ms": 0.647, "default": {"kl": 0.012320362962782383, "cur_lr": 4.999999873689376e-05, "entropy": 6.598195552825928, "total_loss": 13.947826385498047, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10195771604776382, "vf_explained_var": 0.9885042905807495, "vf_loss": 14.037308692932129}, "grad_time_ms": 759.452}, "pid": 3934253, "time_total_s": 64490.04764533043, "episode_reward_mean": -151.43070890493792, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.86872080682477, "policy_reward_mean": {}, "episodes_total": 15288, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -135.66465642046649, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-31-30", "training_iteration": 637, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756535490, "episode_len_mean": 50.0, "timesteps_since_restore": 764400, "time_since_restore": 64490.04764533043, "time_this_iter_s": 91.939772605896, "iterations_since_restore": 637}
+{"timesteps_total": 765600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94764.538, "num_steps_sampled": 765600, "update_time_ms": 2.38, "num_steps_trained": 765600, "load_time_ms": 0.642, "default": {"kl": 0.01230735331773758, "cur_lr": 4.999999873689376e-05, "entropy": 6.373239040374756, "total_loss": 10.451953887939453, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11258859187364578, "vf_explained_var": 0.991771399974823, "vf_loss": 10.552081108093262}, "grad_time_ms": 756.828}, "pid": 3934253, "time_total_s": 64609.111683130264, "episode_reward_mean": -151.24051930669168, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.86872080682477, "policy_reward_mean": {}, "episodes_total": 15312, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -135.66465642046649, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-33-30", "training_iteration": 638, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756535610, "episode_len_mean": 50.0, "timesteps_since_restore": 765600, "time_since_restore": 64609.111683130264, "time_this_iter_s": 119.0640377998352, "iterations_since_restore": 638}
+{"timesteps_total": 766800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94182.613, "num_steps_sampled": 766800, "update_time_ms": 2.45, "num_steps_trained": 766800, "load_time_ms": 0.644, "default": {"kl": 0.01306148525327444, "cur_lr": 4.999999873689376e-05, "entropy": 6.652033805847168, "total_loss": 11.028017044067383, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11691049486398697, "vf_explained_var": 0.9918647408485413, "vf_loss": 11.131702423095703}, "grad_time_ms": 757.641}, "pid": 3934253, "time_total_s": 64694.54643154144, "episode_reward_mean": -151.55331459979587, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.86872080682477, "policy_reward_mean": {}, "episodes_total": 15336, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -135.66465642046649, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-34-55", "training_iteration": 639, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756535695, "episode_len_mean": 50.0, "timesteps_since_restore": 766800, "time_since_restore": 64694.54643154144, "time_this_iter_s": 85.43474841117859, "iterations_since_restore": 639}
+{"timesteps_total": 768000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93906.791, "num_steps_sampled": 768000, "update_time_ms": 2.421, "num_steps_trained": 768000, "load_time_ms": 0.637, "default": {"kl": 0.01332173403352499, "cur_lr": 4.999999873689376e-05, "entropy": 6.823115825653076, "total_loss": 10.166665077209473, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12121745198965073, "vf_explained_var": 0.9914849400520325, "vf_loss": 10.274394989013672}, "grad_time_ms": 780.376}, "pid": 3934253, "time_total_s": 64790.77740550041, "episode_reward_mean": -151.35041920654274, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -163.88208498966546, "policy_reward_mean": {}, "episodes_total": 15360, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.95728698257986, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-36-31", "training_iteration": 640, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756535791, "episode_len_mean": 50.0, "timesteps_since_restore": 768000, "time_since_restore": 64790.77740550041, "time_this_iter_s": 96.23097395896912, "iterations_since_restore": 640}
+{"timesteps_total": 769200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 96050.788, "num_steps_sampled": 769200, "update_time_ms": 2.539, "num_steps_trained": 769200, "load_time_ms": 0.635, "default": {"kl": 0.012392531149089336, "cur_lr": 4.999999873689376e-05, "entropy": 6.514824390411377, "total_loss": 7.781041622161865, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12088058888912201, "vf_explained_var": 0.9940726161003113, "vf_loss": 7.889374732971191}, "grad_time_ms": 778.119}, "pid": 3934253, "time_total_s": 64909.03731918335, "episode_reward_mean": -150.96115774447182, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -163.88208498966546, "policy_reward_mean": {}, "episodes_total": 15384, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.95728698257986, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-38-30", "training_iteration": 641, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756535910, "episode_len_mean": 50.0, "timesteps_since_restore": 769200, "time_since_restore": 64909.03731918335, "time_this_iter_s": 118.25991368293762, "iterations_since_restore": 641}
+{"timesteps_total": 770400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 99890.034, "num_steps_sampled": 770400, "update_time_ms": 2.583, "num_steps_trained": 770400, "load_time_ms": 0.598, "default": {"kl": 0.013650444336235523, "cur_lr": 4.999999873689376e-05, "entropy": 6.526227951049805, "total_loss": 17.587209701538086, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10258938372135162, "vf_explained_var": 0.9885136485099792, "vf_loss": 17.67597770690918}, "grad_time_ms": 782.955}, "pid": 3934253, "time_total_s": 65026.46193361282, "episode_reward_mean": -151.0723561949242, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -163.88208498966546, "policy_reward_mean": {}, "episodes_total": 15408, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.86892682381847, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-40-27", "training_iteration": 642, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756536027, "episode_len_mean": 50.0, "timesteps_since_restore": 770400, "time_since_restore": 65026.46193361282, "time_this_iter_s": 117.42461442947388, "iterations_since_restore": 642}
+{"timesteps_total": 771600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 101276.207, "num_steps_sampled": 771600, "update_time_ms": 2.602, "num_steps_trained": 771600, "load_time_ms": 0.601, "default": {"kl": 0.013796964660286903, "cur_lr": 4.999999873689376e-05, "entropy": 6.629274368286133, "total_loss": 8.684115409851074, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12487681955099106, "vf_explained_var": 0.9934294819831848, "vf_loss": 8.795022964477539}, "grad_time_ms": 778.614}, "pid": 3934253, "time_total_s": 65128.00844717026, "episode_reward_mean": -151.140397750438, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.09382066300637, "policy_reward_mean": {}, "episodes_total": 15432, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.67803283239894, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-42-09", "training_iteration": 643, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756536129, "episode_len_mean": 50.0, "timesteps_since_restore": 771600, "time_since_restore": 65128.00844717026, "time_this_iter_s": 101.54651355743408, "iterations_since_restore": 643}
+{"timesteps_total": 772800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 98711.301, "num_steps_sampled": 772800, "update_time_ms": 2.525, "num_steps_trained": 772800, "load_time_ms": 0.606, "default": {"kl": 0.011933304369449615, "cur_lr": 4.999999873689376e-05, "entropy": 6.656396865844727, "total_loss": 10.540224075317383, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12175066024065018, "vf_explained_var": 0.9919617176055908, "vf_loss": 10.649892807006836}, "grad_time_ms": 774.185}, "pid": 3934253, "time_total_s": 65196.92683053017, "episode_reward_mean": -151.43561850808175, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.09382066300637, "policy_reward_mean": {}, "episodes_total": 15456, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.67803283239894, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-43-17", "training_iteration": 644, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756536197, "episode_len_mean": 50.0, "timesteps_since_restore": 772800, "time_since_restore": 65196.92683053017, "time_this_iter_s": 68.91838335990906, "iterations_since_restore": 644}
+{"timesteps_total": 774000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94945.91, "num_steps_sampled": 774000, "update_time_ms": 2.5, "num_steps_trained": 774000, "load_time_ms": 0.622, "default": {"kl": 0.012951802462339401, "cur_lr": 4.999999873689376e-05, "entropy": 6.6081223487854, "total_loss": 11.552346229553223, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12751348316669464, "vf_explained_var": 0.9913455247879028, "vf_loss": 11.66674518585205}, "grad_time_ms": 776.546}, "pid": 3934253, "time_total_s": 65284.56107521057, "episode_reward_mean": -151.55425962251707, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.09382066300637, "policy_reward_mean": {}, "episodes_total": 15480, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.4596296357344, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-44-45", "training_iteration": 645, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756536285, "episode_len_mean": 50.0, "timesteps_since_restore": 774000, "time_since_restore": 65284.56107521057, "time_this_iter_s": 87.63424468040466, "iterations_since_restore": 645}
+{"timesteps_total": 775200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 97436.799, "num_steps_sampled": 775200, "update_time_ms": 2.503, "num_steps_trained": 775200, "load_time_ms": 0.616, "default": {"kl": 0.01226724311709404, "cur_lr": 4.999999873689376e-05, "entropy": 6.506907939910889, "total_loss": 11.610782623291016, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11629611998796463, "vf_explained_var": 0.990890622138977, "vf_loss": 11.7146577835083}, "grad_time_ms": 776.118}, "pid": 3934253, "time_total_s": 65380.31948065758, "episode_reward_mean": -151.60373988103257, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.09382066300637, "policy_reward_mean": {}, "episodes_total": 15504, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.4596296357344, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-46-21", "training_iteration": 646, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756536381, "episode_len_mean": 50.0, "timesteps_since_restore": 775200, "time_since_restore": 65380.31948065758, "time_this_iter_s": 95.75840544700623, "iterations_since_restore": 646}
+{"timesteps_total": 776400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 98575.832, "num_steps_sampled": 776400, "update_time_ms": 2.522, "num_steps_trained": 776400, "load_time_ms": 0.612, "default": {"kl": 0.013378623872995377, "cur_lr": 4.999999873689376e-05, "entropy": 6.336060047149658, "total_loss": 6.171751022338867, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11012449115514755, "vf_explained_var": 0.9950565695762634, "vf_loss": 6.268329620361328}, "grad_time_ms": 770.747}, "pid": 3934253, "time_total_s": 65483.59596991539, "episode_reward_mean": -151.69143272461713, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.05178551685933, "policy_reward_mean": {}, "episodes_total": 15528, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.4596296357344, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-48-04", "training_iteration": 647, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756536484, "episode_len_mean": 50.0, "timesteps_since_restore": 776400, "time_since_restore": 65483.59596991539, "time_this_iter_s": 103.2764892578125, "iterations_since_restore": 647}
+{"timesteps_total": 777600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 98555.02, "num_steps_sampled": 777600, "update_time_ms": 2.559, "num_steps_trained": 777600, "load_time_ms": 0.62, "default": {"kl": 0.012983070686459541, "cur_lr": 4.999999873689376e-05, "entropy": 6.5856804847717285, "total_loss": 7.57802677154541, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11344198137521744, "vf_explained_var": 0.9940193891525269, "vf_loss": 7.678323745727539}, "grad_time_ms": 779.093}, "pid": 3934253, "time_total_s": 65602.53673911095, "episode_reward_mean": -151.45275960946202, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.05178551685933, "policy_reward_mean": {}, "episodes_total": 15552, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.4596296357344, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-50-03", "training_iteration": 648, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756536603, "episode_len_mean": 50.0, "timesteps_since_restore": 777600, "time_since_restore": 65602.53673911095, "time_this_iter_s": 118.94076919555664, "iterations_since_restore": 648}
+{"timesteps_total": 778800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 99848.798, "num_steps_sampled": 778800, "update_time_ms": 2.549, "num_steps_trained": 778800, "load_time_ms": 0.617, "default": {"kl": 0.011338386684656143, "cur_lr": 4.999999873689376e-05, "entropy": 6.613151550292969, "total_loss": 12.30356216430664, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10434151440858841, "vf_explained_var": 0.9906575679779053, "vf_loss": 12.39642333984375}, "grad_time_ms": 763.442}, "pid": 3934253, "time_total_s": 65700.7525241375, "episode_reward_mean": -151.33954524857702, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.05178551685933, "policy_reward_mean": {}, "episodes_total": 15576, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.78754009526514, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-51-41", "training_iteration": 649, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756536701, "episode_len_mean": 50.0, "timesteps_since_restore": 778800, "time_since_restore": 65700.7525241375, "time_this_iter_s": 98.2157850265503, "iterations_since_restore": 649}
+{"timesteps_total": 780000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 100781.345, "num_steps_sampled": 780000, "update_time_ms": 2.527, "num_steps_trained": 780000, "load_time_ms": 0.622, "default": {"kl": 0.013383557088673115, "cur_lr": 4.999999873689376e-05, "entropy": 6.252384185791016, "total_loss": 9.627680778503418, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11854615807533264, "vf_explained_var": 0.9921794533729553, "vf_loss": 9.732675552368164}, "grad_time_ms": 765.889}, "pid": 3934253, "time_total_s": 65806.33282995224, "episode_reward_mean": -151.20732308144824, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.05178551685933, "policy_reward_mean": {}, "episodes_total": 15600, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.78754009526514, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-53-27", "training_iteration": 650, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756536807, "episode_len_mean": 50.0, "timesteps_since_restore": 780000, "time_since_restore": 65806.33282995224, "time_this_iter_s": 105.58030581474304, "iterations_since_restore": 650}
+{"timesteps_total": 781200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 98930.099, "num_steps_sampled": 781200, "update_time_ms": 2.402, "num_steps_trained": 781200, "load_time_ms": 0.621, "default": {"kl": 0.012757916003465652, "cur_lr": 4.999999873689376e-05, "entropy": 6.42793083190918, "total_loss": 10.180928230285645, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1097208559513092, "vf_explained_var": 0.99164879322052, "vf_loss": 10.277731895446777}, "grad_time_ms": 763.95}, "pid": 3934253, "time_total_s": 65906.05948472023, "episode_reward_mean": -150.76600787910772, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -161.78974099861574, "policy_reward_mean": {}, "episodes_total": 15624, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.78754009526514, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-55-07", "training_iteration": 651, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756536907, "episode_len_mean": 50.0, "timesteps_since_restore": 781200, "time_since_restore": 65906.05948472023, "time_this_iter_s": 99.72665476799011, "iterations_since_restore": 651}
+{"timesteps_total": 782400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95713.624, "num_steps_sampled": 782400, "update_time_ms": 2.422, "num_steps_trained": 782400, "load_time_ms": 0.626, "default": {"kl": 0.011412886902689934, "cur_lr": 4.999999873689376e-05, "entropy": 6.393667221069336, "total_loss": 19.408985137939453, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11556919664144516, "vf_explained_var": 0.9846268892288208, "vf_loss": 19.512996673583984}, "grad_time_ms": 767.191}, "pid": 3934253, "time_total_s": 65991.35186958313, "episode_reward_mean": -151.0416711676692, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -162.9573703906747, "policy_reward_mean": {}, "episodes_total": 15648, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.75696429563217, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-56-32", "training_iteration": 652, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756536992, "episode_len_mean": 50.0, "timesteps_since_restore": 782400, "time_since_restore": 65991.35186958313, "time_this_iter_s": 85.29238486289978, "iterations_since_restore": 652}
+{"timesteps_total": 783600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94709.237, "num_steps_sampled": 783600, "update_time_ms": 2.422, "num_steps_trained": 783600, "load_time_ms": 0.624, "default": {"kl": 0.012586956843733788, "cur_lr": 4.999999873689376e-05, "entropy": 6.694999694824219, "total_loss": 8.255680084228516, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12337406724691391, "vf_explained_var": 0.9931978583335876, "vf_loss": 8.36630916595459}, "grad_time_ms": 776.443}, "pid": 3934253, "time_total_s": 66082.94680023193, "episode_reward_mean": -151.26611398768958, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.28386676227794, "policy_reward_mean": {}, "episodes_total": 15672, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.75696429563217, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-58-04", "training_iteration": 653, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756537084, "episode_len_mean": 50.0, "timesteps_since_restore": 783600, "time_since_restore": 66082.94680023193, "time_this_iter_s": 91.59493064880371, "iterations_since_restore": 653}
+{"timesteps_total": 784800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93480.09, "num_steps_sampled": 784800, "update_time_ms": 2.477, "num_steps_trained": 784800, "load_time_ms": 0.63, "default": {"kl": 0.011648065410554409, "cur_lr": 4.999999873689376e-05, "entropy": 6.439423561096191, "total_loss": 6.595895290374756, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1131308376789093, "vf_explained_var": 0.9945122599601746, "vf_loss": 6.697232723236084}, "grad_time_ms": 782.817}, "pid": 3934253, "time_total_s": 66139.63784337044, "episode_reward_mean": -151.421835252641, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.17811680513293, "policy_reward_mean": {}, "episodes_total": 15696, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.75696429563217, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_08-59-00", "training_iteration": 654, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756537140, "episode_len_mean": 50.0, "timesteps_since_restore": 784800, "time_since_restore": 66139.63784337044, "time_this_iter_s": 56.69104313850403, "iterations_since_restore": 654}
+{"timesteps_total": 786000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93421.758, "num_steps_sampled": 786000, "update_time_ms": 2.448, "num_steps_trained": 786000, "load_time_ms": 0.617, "default": {"kl": 0.012986731715500355, "cur_lr": 4.999999873689376e-05, "entropy": 6.728307723999023, "total_loss": 12.930699348449707, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12061098217964172, "vf_explained_var": 0.9898444414138794, "vf_loss": 13.038162231445312}, "grad_time_ms": 757.218}, "pid": 3934253, "time_total_s": 66226.43191671371, "episode_reward_mean": -151.55709439030414, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.17811680513293, "policy_reward_mean": {}, "episodes_total": 15720, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.89106674327246, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-00-27", "training_iteration": 655, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756537227, "episode_len_mean": 50.0, "timesteps_since_restore": 786000, "time_since_restore": 66226.43191671371, "time_this_iter_s": 86.79407334327698, "iterations_since_restore": 655}
+{"timesteps_total": 787200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 91060.581, "num_steps_sampled": 787200, "update_time_ms": 2.515, "num_steps_trained": 787200, "load_time_ms": 0.615, "default": {"kl": 0.012636389583349228, "cur_lr": 4.999999873689376e-05, "entropy": 6.8368353843688965, "total_loss": 14.384733200073242, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13676336407661438, "vf_explained_var": 0.9892227649688721, "vf_loss": 14.50870132446289}, "grad_time_ms": 754.775}, "pid": 3934253, "time_total_s": 66298.55557537079, "episode_reward_mean": -151.40458382084128, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.17811680513293, "policy_reward_mean": {}, "episodes_total": 15744, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.54462597260832, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-01-39", "training_iteration": 656, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756537299, "episode_len_mean": 50.0, "timesteps_since_restore": 787200, "time_since_restore": 66298.55557537079, "time_this_iter_s": 72.12365865707397, "iterations_since_restore": 656}
+{"timesteps_total": 788400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 86574.116, "num_steps_sampled": 788400, "update_time_ms": 2.507, "num_steps_trained": 788400, "load_time_ms": 0.619, "default": {"kl": 0.013919010758399963, "cur_lr": 4.999999873689376e-05, "entropy": 6.683310508728027, "total_loss": 8.733895301818848, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11779823899269104, "vf_explained_var": 0.992950439453125, "vf_loss": 8.837601661682129}, "grad_time_ms": 761.237}, "pid": 3934253, "time_total_s": 66357.03185558319, "episode_reward_mean": -151.25175747424433, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -166.17811680513293, "policy_reward_mean": {}, "episodes_total": 15768, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.54462597260832, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-02-38", "training_iteration": 657, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756537358, "episode_len_mean": 50.0, "timesteps_since_restore": 788400, "time_since_restore": 66357.03185558319, "time_this_iter_s": 58.476280212402344, "iterations_since_restore": 657}
+{"timesteps_total": 789600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 82118.274, "num_steps_sampled": 789600, "update_time_ms": 2.47, "num_steps_trained": 789600, "load_time_ms": 0.616, "default": {"kl": 0.012269611470401287, "cur_lr": 4.999999873689376e-05, "entropy": 6.552437782287598, "total_loss": 12.102431297302246, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1195855513215065, "vf_explained_var": 0.990055501461029, "vf_loss": 12.209592819213867}, "grad_time_ms": 766.057}, "pid": 3934253, "time_total_s": 66431.46171355247, "episode_reward_mean": -151.0359636759865, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -163.4725483196033, "policy_reward_mean": {}, "episodes_total": 15792, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.54462597260832, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-03-52", "training_iteration": 658, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756537432, "episode_len_mean": 50.0, "timesteps_since_restore": 789600, "time_since_restore": 66431.46171355247, "time_this_iter_s": 74.42985796928406, "iterations_since_restore": 658}
+{"timesteps_total": 790800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 83903.982, "num_steps_sampled": 790800, "update_time_ms": 2.472, "num_steps_trained": 790800, "load_time_ms": 0.62, "default": {"kl": 0.007937086746096611, "cur_lr": 4.999999873689376e-05, "entropy": 6.613508701324463, "total_loss": 45.66404724121094, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1019890308380127, "vf_explained_var": 0.9751158952713013, "vf_loss": 45.757999420166016}, "grad_time_ms": 774.19}, "pid": 3934253, "time_total_s": 66547.61732769012, "episode_reward_mean": -151.75462206130666, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -209.2673208160466, "policy_reward_mean": {}, "episodes_total": 15816, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.54462597260832, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-05-48", "training_iteration": 659, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756537548, "episode_len_mean": 50.0, "timesteps_since_restore": 790800, "time_since_restore": 66547.61732769012, "time_this_iter_s": 116.15561413764954, "iterations_since_restore": 659}
+{"timesteps_total": 792000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 82488.539, "num_steps_sampled": 792000, "update_time_ms": 2.46, "num_steps_trained": 792000, "load_time_ms": 0.622, "default": {"kl": 0.012799741700291634, "cur_lr": 4.999999873689376e-05, "entropy": 6.50419807434082, "total_loss": 8.887038230895996, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1166180744767189, "vf_explained_var": 0.9929201602935791, "vf_loss": 8.990696907043457}, "grad_time_ms": 780.299}, "pid": 3934253, "time_total_s": 66639.10441493988, "episode_reward_mean": -152.13930837402907, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -209.2673208160466, "policy_reward_mean": {}, "episodes_total": 15840, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.75838048415804, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-07-20", "training_iteration": 660, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756537640, "episode_len_mean": 50.0, "timesteps_since_restore": 792000, "time_since_restore": 66639.10441493988, "time_this_iter_s": 91.48708724975586, "iterations_since_restore": 660}
+{"timesteps_total": 793200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 81544.232, "num_steps_sampled": 793200, "update_time_ms": 2.483, "num_steps_trained": 793200, "load_time_ms": 0.623, "default": {"kl": 0.013526301831007004, "cur_lr": 4.999999873689376e-05, "entropy": 6.520646572113037, "total_loss": 6.78563117980957, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1247837170958519, "vf_explained_var": 0.9944746494293213, "vf_loss": 6.896719455718994}, "grad_time_ms": 787.181}, "pid": 3934253, "time_total_s": 66729.45693945885, "episode_reward_mean": -152.25459962815407, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -209.2673208160466, "policy_reward_mean": {}, "episodes_total": 15864, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.75838048415804, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-08-50", "training_iteration": 661, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756537730, "episode_len_mean": 50.0, "timesteps_since_restore": 793200, "time_since_restore": 66729.45693945885, "time_this_iter_s": 90.35252451896667, "iterations_since_restore": 661}
+{"timesteps_total": 794400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 81566.335, "num_steps_sampled": 794400, "update_time_ms": 2.481, "num_steps_trained": 794400, "load_time_ms": 0.624, "default": {"kl": 0.009709502570331097, "cur_lr": 4.999999873689376e-05, "entropy": 6.697580814361572, "total_loss": 31.00704574584961, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10691169649362564, "vf_explained_var": 0.9766644239425659, "vf_loss": 31.1041259765625}, "grad_time_ms": 784.331}, "pid": 3934253, "time_total_s": 66814.94206523895, "episode_reward_mean": -152.15130963223993, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -209.2673208160466, "policy_reward_mean": {}, "episodes_total": 15888, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.75838048415804, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-10-16", "training_iteration": 662, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756537816, "episode_len_mean": 50.0, "timesteps_since_restore": 794400, "time_since_restore": 66814.94206523895, "time_this_iter_s": 85.48512578010559, "iterations_since_restore": 662}
+{"timesteps_total": 795600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 81604.465, "num_steps_sampled": 795600, "update_time_ms": 2.449, "num_steps_trained": 795600, "load_time_ms": 0.63, "default": {"kl": 0.011459432542324066, "cur_lr": 4.999999873689376e-05, "entropy": 6.440579891204834, "total_loss": 9.810572624206543, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11177754402160645, "vf_explained_var": 0.9926278591156006, "vf_loss": 9.910746574401855}, "grad_time_ms": 753.241}, "pid": 3934253, "time_total_s": 66906.60725140572, "episode_reward_mean": -151.43457264234567, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.53212164937042, "policy_reward_mean": {}, "episodes_total": 15912, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.75838048415804, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-11-47", "training_iteration": 663, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756537907, "episode_len_mean": 50.0, "timesteps_since_restore": 795600, "time_since_restore": 66906.60725140572, "time_this_iter_s": 91.6651861667633, "iterations_since_restore": 663}
+{"timesteps_total": 796800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 85209.039, "num_steps_sampled": 796800, "update_time_ms": 2.44, "num_steps_trained": 796800, "load_time_ms": 0.62, "default": {"kl": 0.014888007193803787, "cur_lr": 4.999999873689376e-05, "entropy": 6.557436466217041, "total_loss": 7.459188461303711, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1355879008769989, "vf_explained_var": 0.994841992855072, "vf_loss": 7.579701900482178}, "grad_time_ms": 754.921}, "pid": 3934253, "time_total_s": 66999.36083936691, "episode_reward_mean": -151.34236627439955, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.53212164937042, "policy_reward_mean": {}, "episodes_total": 15936, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.46980255395553, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-13-20", "training_iteration": 664, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756538000, "episode_len_mean": 50.0, "timesteps_since_restore": 796800, "time_since_restore": 66999.36083936691, "time_this_iter_s": 92.7535879611969, "iterations_since_restore": 664}
+{"timesteps_total": 798000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 86016.806, "num_steps_sampled": 798000, "update_time_ms": 2.522, "num_steps_trained": 798000, "load_time_ms": 0.624, "default": {"kl": 0.011667725630104542, "cur_lr": 4.999999873689376e-05, "entropy": 6.511973857879639, "total_loss": 17.17188262939453, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11145953088998795, "vf_explained_var": 0.9879921674728394, "vf_loss": 17.271528244018555}, "grad_time_ms": 770.477}, "pid": 3934253, "time_total_s": 67094.38917398453, "episode_reward_mean": -151.15047415516864, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -159.77875482028378, "policy_reward_mean": {}, "episodes_total": 15960, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.46980255395553, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-14-55", "training_iteration": 665, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756538095, "episode_len_mean": 50.0, "timesteps_since_restore": 798000, "time_since_restore": 67094.38917398453, "time_this_iter_s": 95.02833461761475, "iterations_since_restore": 665}
+{"timesteps_total": 799200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 87249.057, "num_steps_sampled": 799200, "update_time_ms": 2.486, "num_steps_trained": 799200, "load_time_ms": 0.627, "default": {"kl": 0.012102197855710983, "cur_lr": 4.999999873689376e-05, "entropy": 6.43237829208374, "total_loss": 7.126491069793701, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11670617014169693, "vf_explained_var": 0.994476318359375, "vf_loss": 7.23094367980957}, "grad_time_ms": 770.622}, "pid": 3934253, "time_total_s": 67178.83634185791, "episode_reward_mean": -151.30152287858394, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -159.77875482028378, "policy_reward_mean": {}, "episodes_total": 15984, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.46980255395553, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-16-20", "training_iteration": 666, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756538180, "episode_len_mean": 50.0, "timesteps_since_restore": 799200, "time_since_restore": 67178.83634185791, "time_this_iter_s": 84.44716787338257, "iterations_since_restore": 666}
+{"timesteps_total": 800400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 91353.758, "num_steps_sampled": 800400, "update_time_ms": 2.512, "num_steps_trained": 800400, "load_time_ms": 0.628, "default": {"kl": 0.011202414520084858, "cur_lr": 4.999999873689376e-05, "entropy": 6.938111782073975, "total_loss": 28.343528747558594, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11333584785461426, "vf_explained_var": 0.9814093112945557, "vf_loss": 28.44552230834961}, "grad_time_ms": 744.531}, "pid": 3934253, "time_total_s": 67278.0993475914, "episode_reward_mean": -151.50161338262103, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.1941954418082, "policy_reward_mean": {}, "episodes_total": 16008, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.46980255395553, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-17-59", "training_iteration": 667, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756538279, "episode_len_mean": 50.0, "timesteps_since_restore": 800400, "time_since_restore": 67278.0993475914, "time_this_iter_s": 99.26300573348999, "iterations_since_restore": 667}
+{"timesteps_total": 801600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94170.663, "num_steps_sampled": 801600, "update_time_ms": 2.597, "num_steps_trained": 801600, "load_time_ms": 0.64, "default": {"kl": 0.013727385550737381, "cur_lr": 4.999999873689376e-05, "entropy": 6.836697578430176, "total_loss": 8.947104454040527, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.13056008517742157, "vf_explained_var": 0.9923450946807861, "vf_loss": 9.063766479492188}, "grad_time_ms": 728.366}, "pid": 3934253, "time_total_s": 67380.53780794144, "episode_reward_mean": -151.21565188693177, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.1941954418082, "policy_reward_mean": {}, "episodes_total": 16032, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.47529502612474, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-19-41", "training_iteration": 668, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756538381, "episode_len_mean": 50.0, "timesteps_since_restore": 801600, "time_since_restore": 67380.53780794144, "time_this_iter_s": 102.43846035003662, "iterations_since_restore": 668}
+{"timesteps_total": 802800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93225.319, "num_steps_sampled": 802800, "update_time_ms": 2.595, "num_steps_trained": 802800, "load_time_ms": 0.641, "default": {"kl": 0.012475317344069481, "cur_lr": 4.999999873689376e-05, "entropy": 6.533681392669678, "total_loss": 8.152522087097168, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11396972090005875, "vf_explained_var": 0.9932054877281189, "vf_loss": 8.253859519958496}, "grad_time_ms": 722.125}, "pid": 3934253, "time_total_s": 67487.17651033401, "episode_reward_mean": -151.30825228574636, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.1941954418082, "policy_reward_mean": {}, "episodes_total": 16056, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.47529502612474, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-21-28", "training_iteration": 669, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756538488, "episode_len_mean": 50.0, "timesteps_since_restore": 802800, "time_since_restore": 67487.17651033401, "time_this_iter_s": 106.63870239257812, "iterations_since_restore": 669}
+{"timesteps_total": 804000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93995.465, "num_steps_sampled": 804000, "update_time_ms": 2.617, "num_steps_trained": 804000, "load_time_ms": 0.64, "default": {"kl": 0.012943493202328682, "cur_lr": 4.999999873689376e-05, "entropy": 6.564505100250244, "total_loss": 6.670261383056641, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11613664031028748, "vf_explained_var": 0.9946820139884949, "vf_loss": 6.773292541503906}, "grad_time_ms": 719.747}, "pid": 3934253, "time_total_s": 67586.34124970436, "episode_reward_mean": -151.18661510618227, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.1941954418082, "policy_reward_mean": {}, "episodes_total": 16080, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.47529502612474, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-23-07", "training_iteration": 670, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756538587, "episode_len_mean": 50.0, "timesteps_since_restore": 804000, "time_since_restore": 67586.34124970436, "time_this_iter_s": 99.16473937034607, "iterations_since_restore": 670}
+{"timesteps_total": 805200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94262.434, "num_steps_sampled": 805200, "update_time_ms": 2.627, "num_steps_trained": 805200, "load_time_ms": 0.641, "default": {"kl": 0.01226672250777483, "cur_lr": 4.999999873689376e-05, "entropy": 6.056154727935791, "total_loss": 8.908513069152832, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12133461982011795, "vf_explained_var": 0.9923565983772278, "vf_loss": 9.017428398132324}, "grad_time_ms": 718.622}, "pid": 3934253, "time_total_s": 67679.35307192802, "episode_reward_mean": -151.23372935764777, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.1941954418082, "policy_reward_mean": {}, "episodes_total": 16104, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.47529502612474, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-24-40", "training_iteration": 671, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756538680, "episode_len_mean": 50.0, "timesteps_since_restore": 805200, "time_since_restore": 67679.35307192802, "time_this_iter_s": 93.01182222366333, "iterations_since_restore": 671}
+{"timesteps_total": 806400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95903.582, "num_steps_sampled": 806400, "update_time_ms": 2.629, "num_steps_trained": 806400, "load_time_ms": 0.64, "default": {"kl": 0.013150524348020554, "cur_lr": 4.999999873689376e-05, "entropy": 6.534552574157715, "total_loss": 10.642876625061035, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12220504879951477, "vf_explained_var": 0.9922248125076294, "vf_loss": 10.751766204833984}, "grad_time_ms": 725.111}, "pid": 3934253, "time_total_s": 67781.31496477127, "episode_reward_mean": -151.34883223728843, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -163.48883436689144, "policy_reward_mean": {}, "episodes_total": 16128, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -144.58835193512377, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-26-22", "training_iteration": 672, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756538782, "episode_len_mean": 50.0, "timesteps_since_restore": 806400, "time_since_restore": 67781.31496477127, "time_this_iter_s": 101.96189284324646, "iterations_since_restore": 672}
+{"timesteps_total": 807600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 96976.531, "num_steps_sampled": 807600, "update_time_ms": 2.639, "num_steps_trained": 807600, "load_time_ms": 0.628, "default": {"kl": 0.012908346019685268, "cur_lr": 4.999999873689376e-05, "entropy": 6.433506011962891, "total_loss": 11.394515037536621, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11172091960906982, "vf_explained_var": 0.990711510181427, "vf_loss": 11.493165969848633}, "grad_time_ms": 745.565}, "pid": 3934253, "time_total_s": 67883.91454315186, "episode_reward_mean": -151.22890771549737, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -163.48883436689144, "policy_reward_mean": {}, "episodes_total": 16152, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.43140026304368, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-28-05", "training_iteration": 673, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756538885, "episode_len_mean": 50.0, "timesteps_since_restore": 807600, "time_since_restore": 67883.91454315186, "time_this_iter_s": 102.59957838058472, "iterations_since_restore": 673}
+{"timesteps_total": 808800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 98556.756, "num_steps_sampled": 808800, "update_time_ms": 2.654, "num_steps_trained": 808800, "load_time_ms": 0.634, "default": {"kl": 0.013200155459344387, "cur_lr": 4.999999873689376e-05, "entropy": 6.696569442749023, "total_loss": 7.806126117706299, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12211307138204575, "vf_explained_var": 0.9940935969352722, "vf_loss": 7.9148736000061035}, "grad_time_ms": 741.058}, "pid": 3934253, "time_total_s": 67992.4255001545, "episode_reward_mean": -151.68195875737942, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.95892603061685, "policy_reward_mean": {}, "episodes_total": 16176, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.43140026304368, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-29-53", "training_iteration": 674, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756538993, "episode_len_mean": 50.0, "timesteps_since_restore": 808800, "time_since_restore": 67992.4255001545, "time_this_iter_s": 108.51095700263977, "iterations_since_restore": 674}
+{"timesteps_total": 810000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 100854.048, "num_steps_sampled": 810000, "update_time_ms": 2.611, "num_steps_trained": 810000, "load_time_ms": 0.628, "default": {"kl": 0.011847835965454578, "cur_lr": 4.999999873689376e-05, "entropy": 6.6236677169799805, "total_loss": 12.153740882873535, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11479135602712631, "vf_explained_var": 0.9907311201095581, "vf_loss": 12.256536483764648}, "grad_time_ms": 750.617}, "pid": 3934253, "time_total_s": 68110.52215981483, "episode_reward_mean": -151.57957172871915, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -164.95892603061685, "policy_reward_mean": {}, "episodes_total": 16200, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.43140026304368, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-31-52", "training_iteration": 675, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756539112, "episode_len_mean": 50.0, "timesteps_since_restore": 810000, "time_since_restore": 68110.52215981483, "time_this_iter_s": 118.09665966033936, "iterations_since_restore": 675}
+{"timesteps_total": 811200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 102943.044, "num_steps_sampled": 811200, "update_time_ms": 2.666, "num_steps_trained": 811200, "load_time_ms": 0.647, "default": {"kl": 0.012600626796483994, "cur_lr": 4.999999873689376e-05, "entropy": 6.64918851852417, "total_loss": 16.147125244140625, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.14403095841407776, "vf_explained_var": 0.98751300573349, "vf_loss": 16.278398513793945}, "grad_time_ms": 744.666}, "pid": 3934253, "time_total_s": 68215.8010263443, "episode_reward_mean": -151.69943842256066, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -168.19080211933337, "policy_reward_mean": {}, "episodes_total": 16224, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.43140026304368, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-33-37", "training_iteration": 676, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756539217, "episode_len_mean": 50.0, "timesteps_since_restore": 811200, "time_since_restore": 68215.8010263443, "time_this_iter_s": 105.27886652946472, "iterations_since_restore": 676}
+{"timesteps_total": 812400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 101904.919, "num_steps_sampled": 812400, "update_time_ms": 2.604, "num_steps_trained": 812400, "load_time_ms": 0.647, "default": {"kl": 0.012451926246285439, "cur_lr": 4.999999873689376e-05, "entropy": 6.550738334655762, "total_loss": 8.950202941894531, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11671951413154602, "vf_explained_var": 0.9930202960968018, "vf_loss": 9.054315567016602}, "grad_time_ms": 767.029}, "pid": 3934253, "time_total_s": 68304.90540742874, "episode_reward_mean": -151.9255237656048, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -168.19080211933337, "policy_reward_mean": {}, "episodes_total": 16248, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.6557397808477, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-35-06", "training_iteration": 677, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756539306, "episode_len_mean": 50.0, "timesteps_since_restore": 812400, "time_since_restore": 68304.90540742874, "time_this_iter_s": 89.10438108444214, "iterations_since_restore": 677}
+{"timesteps_total": 813600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 100236.082, "num_steps_sampled": 813600, "update_time_ms": 2.596, "num_steps_trained": 813600, "load_time_ms": 0.634, "default": {"kl": 0.011645686812698841, "cur_lr": 4.999999873689376e-05, "entropy": 6.426385879516602, "total_loss": 25.895009994506836, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10874418914318085, "vf_explained_var": 0.9853691458702087, "vf_loss": 25.991962432861328}, "grad_time_ms": 781.1}, "pid": 3934253, "time_total_s": 68390.79599404335, "episode_reward_mean": -151.39686840499604, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -168.19080211933337, "policy_reward_mean": {}, "episodes_total": 16272, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.6557397808477, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-36-32", "training_iteration": 678, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756539392, "episode_len_mean": 50.0, "timesteps_since_restore": 813600, "time_since_restore": 68390.79599404335, "time_this_iter_s": 85.89058661460876, "iterations_since_restore": 678}
+{"timesteps_total": 814800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 100186.238, "num_steps_sampled": 814800, "update_time_ms": 2.597, "num_steps_trained": 814800, "load_time_ms": 0.633, "default": {"kl": 0.012054681777954102, "cur_lr": 4.999999873689376e-05, "entropy": 6.241293907165527, "total_loss": 9.844844818115234, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10538104176521301, "vf_explained_var": 0.9917342066764832, "vf_loss": 9.938020706176758}, "grad_time_ms": 786.948}, "pid": 3934253, "time_total_s": 68496.99503946304, "episode_reward_mean": -151.08533412484206, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -168.19080211933337, "policy_reward_mean": {}, "episodes_total": 16296, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.74999265829365, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-38-18", "training_iteration": 679, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756539498, "episode_len_mean": 50.0, "timesteps_since_restore": 814800, "time_since_restore": 68496.99503946304, "time_this_iter_s": 106.199045419693, "iterations_since_restore": 679}
+{"timesteps_total": 816000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 99076.14, "num_steps_sampled": 816000, "update_time_ms": 2.567, "num_steps_trained": 816000, "load_time_ms": 0.629, "default": {"kl": 0.012802320532500744, "cur_lr": 4.999999873689376e-05, "entropy": 6.331047534942627, "total_loss": 10.05265998840332, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11535504460334778, "vf_explained_var": 0.9921693801879883, "vf_loss": 10.155052185058594}, "grad_time_ms": 781.558}, "pid": 3934253, "time_total_s": 68585.0056154728, "episode_reward_mean": -150.79877807890264, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -162.53422148169122, "policy_reward_mean": {}, "episodes_total": 16320, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.74999265829365, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-39-46", "training_iteration": 680, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756539586, "episode_len_mean": 50.0, "timesteps_since_restore": 816000, "time_since_restore": 68585.0056154728, "time_this_iter_s": 88.01057600975037, "iterations_since_restore": 680}
+{"timesteps_total": 817200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 97339.498, "num_steps_sampled": 817200, "update_time_ms": 2.579, "num_steps_trained": 817200, "load_time_ms": 0.626, "default": {"kl": 0.012418713420629501, "cur_lr": 4.999999873689376e-05, "entropy": 6.361508846282959, "total_loss": 8.864505767822266, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12446033954620361, "vf_explained_var": 0.9929365515708923, "vf_loss": 8.976390838623047}, "grad_time_ms": 780.2}, "pid": 3934253, "time_total_s": 68660.64840269089, "episode_reward_mean": -150.8476379489054, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -163.65163740307503, "policy_reward_mean": {}, "episodes_total": 16344, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -140.52325129365028, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-41-02", "training_iteration": 681, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756539662, "episode_len_mean": 50.0, "timesteps_since_restore": 817200, "time_since_restore": 68660.64840269089, "time_this_iter_s": 75.64278721809387, "iterations_since_restore": 681}
+{"timesteps_total": 818400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95654.015, "num_steps_sampled": 818400, "update_time_ms": 2.58, "num_steps_trained": 818400, "load_time_ms": 0.625, "default": {"kl": 0.011963529512286186, "cur_lr": 4.999999873689376e-05, "entropy": 6.352933883666992, "total_loss": 14.166751861572266, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.12253758311271667, "vf_explained_var": 0.9886897206306458, "vf_loss": 14.277175903320312}, "grad_time_ms": 779.43}, "pid": 3934253, "time_total_s": 68745.74711084366, "episode_reward_mean": -150.81457327193596, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -163.87497421953273, "policy_reward_mean": {}, "episodes_total": 16368, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -140.52325129365028, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-42-27", "training_iteration": 682, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756539747, "episode_len_mean": 50.0, "timesteps_since_restore": 818400, "time_since_restore": 68745.74711084366, "time_this_iter_s": 85.098708152771, "iterations_since_restore": 682}
+{"timesteps_total": 819600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 96394.793, "num_steps_sampled": 819600, "update_time_ms": 2.6, "num_steps_trained": 819600, "load_time_ms": 0.642, "default": {"kl": 0.011911649256944656, "cur_lr": 4.999999873689376e-05, "entropy": 6.103545188903809, "total_loss": 9.033626556396484, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1012423187494278, "vf_explained_var": 0.9925553202629089, "vf_loss": 9.122809410095215}, "grad_time_ms": 785.224}, "pid": 3934253, "time_total_s": 68855.81179380417, "episode_reward_mean": -150.8460074407685, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -163.87497421953273, "policy_reward_mean": {}, "episodes_total": 16392, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -140.52325129365028, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-44-17", "training_iteration": 683, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756539857, "episode_len_mean": 50.0, "timesteps_since_restore": 819600, "time_since_restore": 68855.81179380417, "time_this_iter_s": 110.06468296051025, "iterations_since_restore": 683}
+{"timesteps_total": 820800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 96089.793, "num_steps_sampled": 820800, "update_time_ms": 2.585, "num_steps_trained": 820800, "load_time_ms": 0.646, "default": {"kl": 0.01197890192270279, "cur_lr": 4.999999873689376e-05, "entropy": 6.369986534118652, "total_loss": 9.048433303833008, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11695381999015808, "vf_explained_var": 0.9941478371620178, "vf_loss": 9.15325927734375}, "grad_time_ms": 779.6}, "pid": 3934253, "time_total_s": 68961.21634984016, "episode_reward_mean": -151.20101435769047, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -163.87497421953273, "policy_reward_mean": {}, "episodes_total": 16416, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -140.52325129365028, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-46-02", "training_iteration": 684, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756539962, "episode_len_mean": 50.0, "timesteps_since_restore": 820800, "time_since_restore": 68961.21634984016, "time_this_iter_s": 105.40455603599548, "iterations_since_restore": 684}
+{"timesteps_total": 822000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 95277.527, "num_steps_sampled": 822000, "update_time_ms": 2.612, "num_steps_trained": 822000, "load_time_ms": 0.643, "default": {"kl": 0.012461802922189236, "cur_lr": 4.999999873689376e-05, "entropy": 6.272339344024658, "total_loss": 5.204405784606934, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11976167559623718, "vf_explained_var": 0.9955686926841736, "vf_loss": 5.311550617218018}, "grad_time_ms": 781.68}, "pid": 3934253, "time_total_s": 69071.21107387543, "episode_reward_mean": -151.26061614676337, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -163.87497421953273, "policy_reward_mean": {}, "episodes_total": 16440, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.8100592191962, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-47-52", "training_iteration": 685, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756540072, "episode_len_mean": 50.0, "timesteps_since_restore": 822000, "time_since_restore": 69071.21107387543, "time_this_iter_s": 109.99472403526306, "iterations_since_restore": 685}
+{"timesteps_total": 823200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 92447.797, "num_steps_sampled": 823200, "update_time_ms": 2.543, "num_steps_trained": 823200, "load_time_ms": 0.629, "default": {"kl": 0.01256785448640585, "cur_lr": 4.999999873689376e-05, "entropy": 6.144095420837402, "total_loss": 8.30500602722168, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11857112497091293, "vf_explained_var": 0.9932448863983154, "vf_loss": 8.410853385925293}, "grad_time_ms": 787.828}, "pid": 3934253, "time_total_s": 69148.25281834602, "episode_reward_mean": -151.32705938647703, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -163.87497421953273, "policy_reward_mean": {}, "episodes_total": 16464, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.57374849668588, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-49-09", "training_iteration": 686, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756540149, "episode_len_mean": 50.0, "timesteps_since_restore": 823200, "time_since_restore": 69148.25281834602, "time_this_iter_s": 77.04174447059631, "iterations_since_restore": 686}
+{"timesteps_total": 824400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 86656.142, "num_steps_sampled": 824400, "update_time_ms": 2.651, "num_steps_trained": 824400, "load_time_ms": 0.625, "default": {"kl": 0.013023233972489834, "cur_lr": 4.999999873689376e-05, "entropy": 6.194946765899658, "total_loss": 7.450124740600586, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11979203671216965, "vf_explained_var": 0.9941511154174805, "vf_loss": 7.556732177734375}, "grad_time_ms": 776.974}, "pid": 3934253, "time_total_s": 69179.33352923393, "episode_reward_mean": -151.38487842314197, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -162.7455905758164, "policy_reward_mean": {}, "episodes_total": 16488, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.57374849668588, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-49-41", "training_iteration": 687, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756540181, "episode_len_mean": 50.0, "timesteps_since_restore": 824400, "time_since_restore": 69179.33352923393, "time_this_iter_s": 31.080710887908936, "iterations_since_restore": 687}
+{"timesteps_total": 825600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 87077.504, "num_steps_sampled": 825600, "update_time_ms": 2.579, "num_steps_trained": 825600, "load_time_ms": 0.655, "default": {"kl": 0.012215284630656242, "cur_lr": 4.999999873689376e-05, "entropy": 6.197381496429443, "total_loss": 6.406160354614258, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11759992688894272, "vf_explained_var": 0.9950463771820068, "vf_loss": 6.511392116546631}, "grad_time_ms": 767.738}, "pid": 3934253, "time_total_s": 69269.34405446053, "episode_reward_mean": -151.17998303169554, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -161.4520066765237, "policy_reward_mean": {}, "episodes_total": 16512, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.57374849668588, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-51-11", "training_iteration": 688, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756540271, "episode_len_mean": 50.0, "timesteps_since_restore": 825600, "time_since_restore": 69269.34405446053, "time_this_iter_s": 90.01052522659302, "iterations_since_restore": 688}
+{"timesteps_total": 826800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 85577.639, "num_steps_sampled": 826800, "update_time_ms": 2.554, "num_steps_trained": 826800, "load_time_ms": 0.653, "default": {"kl": 0.012390440329909325, "cur_lr": 4.999999873689376e-05, "entropy": 6.316177845001221, "total_loss": 18.18695640563965, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11650380492210388, "vf_explained_var": 0.9914601445198059, "vf_loss": 18.290916442871094}, "grad_time_ms": 778.028}, "pid": 3934253, "time_total_s": 69360.6469142437, "episode_reward_mean": -151.49063416896567, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -165.0456315643184, "policy_reward_mean": {}, "episodes_total": 16536, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -142.57374849668588, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-52-42", "training_iteration": 689, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756540362, "episode_len_mean": 50.0, "timesteps_since_restore": 826800, "time_since_restore": 69360.6469142437, "time_this_iter_s": 91.30285978317261, "iterations_since_restore": 689}
+{"timesteps_total": 828000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 86750.332, "num_steps_sampled": 828000, "update_time_ms": 2.59, "num_steps_trained": 828000, "load_time_ms": 0.658, "default": {"kl": 0.011441261507570744, "cur_lr": 4.999999873689376e-05, "entropy": 6.2853240966796875, "total_loss": 22.910051345825195, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11486013978719711, "vf_explained_var": 0.9828611016273499, "vf_loss": 23.013328552246094}, "grad_time_ms": 770.52}, "pid": 3934253, "time_total_s": 69460.30961084366, "episode_reward_mean": -151.7659706244583, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -180.0312363975615, "policy_reward_mean": {}, "episodes_total": 16560, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -147.80484426119497, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-54-22", "training_iteration": 690, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756540462, "episode_len_mean": 50.0, "timesteps_since_restore": 828000, "time_since_restore": 69460.30961084366, "time_this_iter_s": 99.66269659996033, "iterations_since_restore": 690}
+{"timesteps_total": 829200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 90554.163, "num_steps_sampled": 829200, "update_time_ms": 2.575, "num_steps_trained": 829200, "load_time_ms": 0.659, "default": {"kl": 0.012882929295301437, "cur_lr": 4.999999873689376e-05, "entropy": 6.380704879760742, "total_loss": 14.827506065368652, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11432640999555588, "vf_explained_var": 0.9901928305625916, "vf_loss": 14.928787231445312}, "grad_time_ms": 765.682}, "pid": 3934253, "time_total_s": 69573.93123292923, "episode_reward_mean": -151.96510791490692, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -180.0312363975615, "policy_reward_mean": {}, "episodes_total": 16584, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -146.67734841385254, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-56-15", "training_iteration": 691, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756540575, "episode_len_mean": 50.0, "timesteps_since_restore": 829200, "time_since_restore": 69573.93123292923, "time_this_iter_s": 113.62162208557129, "iterations_since_restore": 691}
+{"timesteps_total": 830400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94301.75, "num_steps_sampled": 830400, "update_time_ms": 2.544, "num_steps_trained": 830400, "load_time_ms": 0.653, "default": {"kl": 0.013330933637917042, "cur_lr": 4.999999873689376e-05, "entropy": 6.488500595092773, "total_loss": 25.625591278076172, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11284230649471283, "vf_explained_var": 0.9840977191925049, "vf_loss": 25.724937438964844}, "grad_time_ms": 729.438}, "pid": 3934253, "time_total_s": 69696.14327788353, "episode_reward_mean": -152.45992568128085, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -180.0312363975615, "policy_reward_mean": {}, "episodes_total": 16608, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -146.67734841385254, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_09-58-17", "training_iteration": 692, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756540697, "episode_len_mean": 50.0, "timesteps_since_restore": 830400, "time_since_restore": 69696.14327788353, "time_this_iter_s": 122.21204495429993, "iterations_since_restore": 692}
+{"timesteps_total": 831600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 93609.137, "num_steps_sampled": 831600, "update_time_ms": 2.539, "num_steps_trained": 831600, "load_time_ms": 0.641, "default": {"kl": 0.013981933705508709, "cur_lr": 4.999999873689376e-05, "entropy": 6.311878681182861, "total_loss": 6.803781986236572, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11857353150844574, "vf_explained_var": 0.9953944087028503, "vf_loss": 6.908199310302734}, "grad_time_ms": 718.791}, "pid": 3934253, "time_total_s": 69799.17583036423, "episode_reward_mean": -152.2516817528202, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -180.0312363975615, "policy_reward_mean": {}, "episodes_total": 16632, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -146.67734841385254, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_10-00-01", "training_iteration": 693, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756540801, "episode_len_mean": 50.0, "timesteps_since_restore": 831600, "time_since_restore": 69799.17583036423, "time_this_iter_s": 103.03255248069763, "iterations_since_restore": 693}
+{"timesteps_total": 832800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 96258.397, "num_steps_sampled": 832800, "update_time_ms": 2.525, "num_steps_trained": 832800, "load_time_ms": 0.646, "default": {"kl": 0.011710396967828274, "cur_lr": 4.999999873689376e-05, "entropy": 6.321974754333496, "total_loss": 7.118447303771973, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11277797818183899, "vf_explained_var": 0.9942653179168701, "vf_loss": 7.219368934631348}, "grad_time_ms": 704.768}, "pid": 3934253, "time_total_s": 69930.9327340126, "episode_reward_mean": -152.43928058541786, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -180.0312363975615, "policy_reward_mean": {}, "episodes_total": 16656, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -146.67734841385254, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_10-02-12", "training_iteration": 694, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756540932, "episode_len_mean": 50.0, "timesteps_since_restore": 832800, "time_since_restore": 69930.9327340126, "time_this_iter_s": 131.75690364837646, "iterations_since_restore": 694}
+{"timesteps_total": 834000, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 94101.325, "num_steps_sampled": 834000, "update_time_ms": 2.504, "num_steps_trained": 834000, "load_time_ms": 0.643, "default": {"kl": 0.009334594011306763, "cur_lr": 4.999999873689376e-05, "entropy": 6.432497501373291, "total_loss": 42.389686584472656, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.0987766683101654, "vf_explained_var": 0.9710657596588135, "vf_loss": 42.479007720947266}, "grad_time_ms": 674.781}, "pid": 3934253, "time_total_s": 70019.05616569519, "episode_reward_mean": -152.5862841135043, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -215.12316385063616, "policy_reward_mean": {}, "episodes_total": 16680, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.70947457469018, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_10-03-40", "training_iteration": 695, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756541020, "episode_len_mean": 50.0, "timesteps_since_restore": 834000, "time_since_restore": 70019.05616569519, "time_this_iter_s": 88.12343168258667, "iterations_since_restore": 695}
+{"timesteps_total": 835200, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 97703.421, "num_steps_sampled": 835200, "update_time_ms": 2.461, "num_steps_trained": 835200, "load_time_ms": 0.642, "default": {"kl": 0.010365894995629787, "cur_lr": 4.999999873689376e-05, "entropy": 6.277856349945068, "total_loss": 31.63107681274414, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.10792961716651917, "vf_explained_var": 0.9769017696380615, "vf_loss": 31.72850799560547}, "grad_time_ms": 674.391}, "pid": 3934253, "time_total_s": 70132.1143321991, "episode_reward_mean": -152.69711119289224, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -215.12316385063616, "policy_reward_mean": {}, "episodes_total": 16704, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.70947457469018, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_10-05-33", "training_iteration": 696, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756541133, "episode_len_mean": 50.0, "timesteps_since_restore": 835200, "time_since_restore": 70132.1143321991, "time_this_iter_s": 113.05816650390625, "iterations_since_restore": 696}
+{"timesteps_total": 836400, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 100679.433, "num_steps_sampled": 836400, "update_time_ms": 2.366, "num_steps_trained": 836400, "load_time_ms": 0.642, "default": {"kl": 0.012433375231921673, "cur_lr": 4.999999873689376e-05, "entropy": 6.009059429168701, "total_loss": 14.474651336669922, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11254024505615234, "vf_explained_var": 0.9880602359771729, "vf_loss": 14.574604034423828}, "grad_time_ms": 689.482}, "pid": 3934253, "time_total_s": 70193.10514330864, "episode_reward_mean": -152.4112371065005, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -215.12316385063616, "policy_reward_mean": {}, "episodes_total": 16728, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.70947457469018, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_10-06-34", "training_iteration": 697, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756541194, "episode_len_mean": 50.0, "timesteps_since_restore": 836400, "time_since_restore": 70193.10514330864, "time_this_iter_s": 60.99081110954285, "iterations_since_restore": 697}
+{"timesteps_total": 837600, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 102270.979, "num_steps_sampled": 837600, "update_time_ms": 2.433, "num_steps_trained": 837600, "load_time_ms": 0.613, "default": {"kl": 0.012350209057331085, "cur_lr": 4.999999873689376e-05, "entropy": 6.168177127838135, "total_loss": 10.318385124206543, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.1254514455795288, "vf_explained_var": 0.9916518330574036, "vf_loss": 10.431331634521484}, "grad_time_ms": 692.615}, "pid": 3934253, "time_total_s": 70299.0635895729, "episode_reward_mean": -152.33370792697596, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -215.12316385063616, "policy_reward_mean": {}, "episodes_total": 16752, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -139.70947457469018, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_10-08-20", "training_iteration": 698, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756541300, "episode_len_mean": 50.0, "timesteps_since_restore": 837600, "time_since_restore": 70299.0635895729, "time_this_iter_s": 105.95844626426697, "iterations_since_restore": 698}
+{"timesteps_total": 838800, "experiment_id": "28bf8d7c89244732ac12356321e5be58", "done": false, "info": {"sample_time_ms": 101079.944, "num_steps_sampled": 838800, "update_time_ms": 2.454, "num_steps_trained": 838800, "load_time_ms": 0.619, "default": {"kl": 0.012040354311466217, "cur_lr": 4.999999873689376e-05, "entropy": 6.06229305267334, "total_loss": 10.639694213867188, "cur_kl_coeff": 1.0125000476837158, "policy_loss": -0.11114271730184555, "vf_explained_var": 0.991613507270813, "vf_loss": 10.738645553588867}, "grad_time_ms": 687.555}, "pid": 3934253, "time_total_s": 70378.40663385391, "episode_reward_mean": -152.0317558586114, "hostname": "cda-server-6", "episodes_this_iter": 24, "episode_reward_min": -186.7766576967727, "policy_reward_mean": {}, "episodes_total": 16776, "node_ip": "10.157.146.6", "custom_metrics": {}, "episode_reward_max": -141.7210758642898, "num_metric_batches_dropped": 0, "timesteps_this_iter": 1200, "date": "2025-08-30_10-09-40", "training_iteration": 699, "config": {"compress_observations": false, "use_gae": true, "num_envs_per_worker": 1, "straggler_mitigation": false, "input_evaluation": null, "entropy_coeff": 0.0, "output_max_file_size": 67108864, "vf_share_layers": false, "env_config": {"generalize": true, "run_valid": false}, "batch_mode": "truncate_episodes", "vf_loss_coeff": 1.0, "observation_filter": "MeanStdFilter", "preprocessor_pref": "deepmind", "multiagent": {"policy_graphs": {}, "policy_mapping_fn": null, "policies_to_train": null}, "lambda": 1.0, "gamma": 0.99, "num_cpus_for_driver": 1, "log_level": "INFO", "num_cpus_per_worker": 1, "clip_actions": true, "synchronize_filters": true, "sample_batch_size": 200, "monitor": false, "sample_async": false, "num_workers": 6, "num_sgd_iter": 30, "postprocess_inputs": false, "num_gpus_per_worker": 0, "clip_param": 0.3, "env": "LEDRO_D_FC", "vf_clip_param": 10.0, "grad_clip": null, "lr": 5e-05, "simple_optimizer": false, "kl_target": 0.01, "optimizer": {}, "tf_session_args": {"log_device_placement": false, "gpu_options": {"allow_growth": true}, "intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "sgd_minibatch_size": 128, "horizon": 50, "model": {"fcnet_hiddens": [128, 128, 128], "grayscale": false, "zero_mean": true, "custom_preprocessor": null, "fcnet_activation": "tanh", "framestack": true, "free_log_std": false, "conv_activation": "relu", "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "dim": 84, "custom_options": {}, "use_lstm": false, "squash_to_range": false, "conv_filters": null, "custom_model": null}, "lr_schedule": null, "local_evaluator_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "input": "sampler", "output": null, "kl_coeff": 0.2, "clip_rewards": null, "collect_metrics_timeout": 180, "callbacks": {"on_train_result": null, "on_sample_end": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "num_gpus": 0, "custom_resources_per_worker": {}, "train_batch_size": 1200, "output_compress_columns": ["obs", "new_obs"]}, "timestamp": 1756541380, "episode_len_mean": 50.0, "timesteps_since_restore": 838800, "time_since_restore": 70378.40663385391, "time_this_iter_s": 79.34304428100586, "iterations_since_restore": 699}
diff --git a/experiments/ledro_d_fc_7nm_run4_horizon_50_range_10_400_400_start_33/README.md b/experiments/ledro_d_fc_7nm_run4_horizon_50_range_10_400_400_start_33/README.md
new file mode 100644
index 0000000..65c0ffd
--- /dev/null
+++ b/experiments/ledro_d_fc_7nm_run4_horizon_50_range_10_400_400_start_33/README.md
@@ -0,0 +1,2 @@
+The maximum ray/tune/episode_reward_max is just for -135.7 for the horizon length of 50
+-> Thus the average FoM is : -2.71
\ No newline at end of file
diff --git a/experiments/ledro_d_fc_7nm_run4_horizon_50_range_10_400_400_start_33/image.png b/experiments/ledro_d_fc_7nm_run4_horizon_50_range_10_400_400_start_33/image.png
new file mode 100644
index 0000000..8a298a5
Binary files /dev/null and b/experiments/ledro_d_fc_7nm_run4_horizon_50_range_10_400_400_start_33/image.png differ
diff --git a/experiments/optimize-Zhenxin_S_FC_65nmPTM-run14/PPO_Zhenxin_S_FC_0_2025-09-04_16-10-519x116nc6/ckt_910/checkpoint-910 b/experiments/optimize-Zhenxin_S_FC_65nmPTM-run14/PPO_Zhenxin_S_FC_0_2025-09-04_16-10-519x116nc6/ckt_910/checkpoint-910
new file mode 100644
index 0000000..462dec1
Binary files /dev/null and b/experiments/optimize-Zhenxin_S_FC_65nmPTM-run14/PPO_Zhenxin_S_FC_0_2025-09-04_16-10-519x116nc6/ckt_910/checkpoint-910 differ
diff --git a/experiments/optimize-Zhenxin_S_FC_65nmPTM-run14/PPO_Zhenxin_S_FC_0_2025-09-04_16-10-519x116nc6/ckt_910/checkpoint-910.tune_metadata b/experiments/optimize-Zhenxin_S_FC_65nmPTM-run14/PPO_Zhenxin_S_FC_0_2025-09-04_16-10-519x116nc6/ckt_910/checkpoint-910.tune_metadata
new file mode 100644
index 0000000..4239608
Binary files /dev/null and b/experiments/optimize-Zhenxin_S_FC_65nmPTM-run14/PPO_Zhenxin_S_FC_0_2025-09-04_16-10-519x116nc6/ckt_910/checkpoint-910.tune_metadata differ
diff --git a/experiments/optimize-Zhenxin_S_FC_65nmPTM-run14/PPO_Zhenxin_S_FC_0_2025-09-04_16-10-519x116nc6/error_2025-09-04_23-19-28.txt b/experiments/optimize-Zhenxin_S_FC_65nmPTM-run14/PPO_Zhenxin_S_FC_0_2025-09-04_16-10-519x116nc6/error_2025-09-04_23-19-28.txt
new file mode 100644
index 0000000..4fe1c09
--- /dev/null
+++ b/experiments/optimize-Zhenxin_S_FC_65nmPTM-run14/PPO_Zhenxin_S_FC_0_2025-09-04_16-10-519x116nc6/error_2025-09-04_23-19-28.txt
@@ -0,0 +1,52 @@
+Traceback (most recent call last):
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/tune/trial_runner.py", line 378, in _process_events
+ result = self.trial_executor.fetch_result(trial)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/tune/ray_trial_executor.py", line 228, in fetch_result
+ result = ray.get(trial_future[0])
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/worker.py", line 2132, in get
+ raise value
+ray.worker.RayTaskError: [36mray_worker[39m (pid=3651948, host=cda-server-2)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/agents/agent.py", line 284, in train
+ result = Trainable.train(self)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/tune/trainable.py", line 151, in train
+ result = self._train()
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/agents/ppo/ppo.py", line 103, in _train
+ fetches = self.optimizer.step()
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/optimizers/multi_gpu_optimizer.py", line 125, in step
+ self.num_envs_per_worker, self.train_batch_size)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/optimizers/rollout.py", line 28, in collect_samples
+ next_sample = ray.get(fut_sample)
+ray.worker.RayTaskError: [36mray_worker[39m (pid=3651954, host=cda-server-2)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/evaluation/policy_evaluator.py", line 368, in sample
+ batches = [self.input_reader.next()]
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/offline/input_reader.py", line 31, in next
+ batches = [self.sampler.get_data()]
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/evaluation/sampler.py", line 65, in get_data
+ item = next(self.rollout_provider)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/evaluation/sampler.py", line 267, in _env_runner
+ preprocessors, obs_filters, unroll_length, pack, callbacks)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/evaluation/sampler.py", line 403, in _process_observations
+ resetted_obs = base_env.try_reset(env_id)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/env/base_env.py", line 257, in try_reset
+ return {_DUMMY_AGENT_ID: self.vector_env.reset_at(env_id)}
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/env/vector_env.py", line 88, in reset_at
+ return self.envs[index].reset()
+ File "/home/pham/code/analog-ml/AutoCkt-optimize-Zhenxin_S_FC_65nmPTM-run14/autockt/envs/ngspice_zhenxin_s_fc.py", line 344, in reset
+ self.cur_specs = self.update(self.cur_params_idx)
+ File "/home/pham/code/analog-ml/AutoCkt-optimize-Zhenxin_S_FC_65nmPTM-run14/autockt/envs/ngspice_zhenxin_s_fc.py", line 496, in update
+ self.sim_env.create_design_and_simulate(param_val[0])[1].items(),
+ File "/home/pham/code/analog-ml/AutoCkt-optimize-Zhenxin_S_FC_65nmPTM-run14/eval_engines/ngspice/ngspice_wrapper.py", line 133, in create_design_and_simulate
+ specs = self.translate_result(design_folder)
+ File "/home/pham/code/analog-ml/AutoCkt-optimize-Zhenxin_S_FC_65nmPTM-run14/eval_engines/ngspice/Zhenxin_S_FC.py", line 29, in translate_result
+ freq, vout, ibias = self.parse_output(output_path)
+ File "/home/pham/code/analog-ml/AutoCkt-optimize-Zhenxin_S_FC_65nmPTM-run14/eval_engines/ngspice/Zhenxin_S_FC.py", line 46, in parse_output
+ ac_raw_outputs = np.genfromtxt(ac_fname, skip_header=1)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/numpy/lib/npyio.py", line 1744, in genfromtxt
+ fhd = iter(np.lib._datasource.open(fname, 'rt', encoding=encoding))
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/numpy/lib/_datasource.py", line 266, in open
+ return ds.open(path, mode, encoding=encoding, newline=newline)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/numpy/lib/_datasource.py", line 624, in open
+ raise IOError("%s not found." % path)
+OSError: /tmp/ckt_da/designs_Zhenxin_S_FC/Zhenxin_S_FC_242.843_133.808_292.648_198.925_139.809_131.131_0.657_0.178_0.686_0.305_1.02109/ac.csv not found.
+
+
diff --git a/experiments/optimize-Zhenxin_S_FC_65nmPTM-run14/PPO_Zhenxin_S_FC_0_2025-09-04_16-10-519x116nc6/error_2025-09-05_00-32-02.txt b/experiments/optimize-Zhenxin_S_FC_65nmPTM-run14/PPO_Zhenxin_S_FC_0_2025-09-04_16-10-519x116nc6/error_2025-09-05_00-32-02.txt
new file mode 100644
index 0000000..d719bcc
--- /dev/null
+++ b/experiments/optimize-Zhenxin_S_FC_65nmPTM-run14/PPO_Zhenxin_S_FC_0_2025-09-04_16-10-519x116nc6/error_2025-09-05_00-32-02.txt
@@ -0,0 +1,52 @@
+Traceback (most recent call last):
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/tune/trial_runner.py", line 378, in _process_events
+ result = self.trial_executor.fetch_result(trial)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/tune/ray_trial_executor.py", line 228, in fetch_result
+ result = ray.get(trial_future[0])
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/worker.py", line 2132, in get
+ raise value
+ray.worker.RayTaskError: [36mray_worker[39m (pid=3651947, host=cda-server-2)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/agents/agent.py", line 284, in train
+ result = Trainable.train(self)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/tune/trainable.py", line 151, in train
+ result = self._train()
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/agents/ppo/ppo.py", line 103, in _train
+ fetches = self.optimizer.step()
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/optimizers/multi_gpu_optimizer.py", line 125, in step
+ self.num_envs_per_worker, self.train_batch_size)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/optimizers/rollout.py", line 28, in collect_samples
+ next_sample = ray.get(fut_sample)
+ray.worker.RayTaskError: [36mray_worker[39m (pid=3651950, host=cda-server-2)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/evaluation/policy_evaluator.py", line 368, in sample
+ batches = [self.input_reader.next()]
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/offline/input_reader.py", line 31, in next
+ batches = [self.sampler.get_data()]
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/evaluation/sampler.py", line 65, in get_data
+ item = next(self.rollout_provider)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/evaluation/sampler.py", line 267, in _env_runner
+ preprocessors, obs_filters, unroll_length, pack, callbacks)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/evaluation/sampler.py", line 403, in _process_observations
+ resetted_obs = base_env.try_reset(env_id)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/env/base_env.py", line 257, in try_reset
+ return {_DUMMY_AGENT_ID: self.vector_env.reset_at(env_id)}
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/env/vector_env.py", line 88, in reset_at
+ return self.envs[index].reset()
+ File "/home/pham/code/analog-ml/AutoCkt-optimize-Zhenxin_S_FC_65nmPTM-run14/autockt/envs/ngspice_zhenxin_s_fc.py", line 344, in reset
+ self.cur_specs = self.update(self.cur_params_idx)
+ File "/home/pham/code/analog-ml/AutoCkt-optimize-Zhenxin_S_FC_65nmPTM-run14/autockt/envs/ngspice_zhenxin_s_fc.py", line 496, in update
+ self.sim_env.create_design_and_simulate(param_val[0])[1].items(),
+ File "/home/pham/code/analog-ml/AutoCkt-optimize-Zhenxin_S_FC_65nmPTM-run14/eval_engines/ngspice/ngspice_wrapper.py", line 133, in create_design_and_simulate
+ specs = self.translate_result(design_folder)
+ File "/home/pham/code/analog-ml/AutoCkt-optimize-Zhenxin_S_FC_65nmPTM-run14/eval_engines/ngspice/Zhenxin_S_FC.py", line 29, in translate_result
+ freq, vout, ibias = self.parse_output(output_path)
+ File "/home/pham/code/analog-ml/AutoCkt-optimize-Zhenxin_S_FC_65nmPTM-run14/eval_engines/ngspice/Zhenxin_S_FC.py", line 46, in parse_output
+ ac_raw_outputs = np.genfromtxt(ac_fname, skip_header=1)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/numpy/lib/npyio.py", line 1744, in genfromtxt
+ fhd = iter(np.lib._datasource.open(fname, 'rt', encoding=encoding))
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/numpy/lib/_datasource.py", line 266, in open
+ return ds.open(path, mode, encoding=encoding, newline=newline)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/numpy/lib/_datasource.py", line 624, in open
+ raise IOError("%s not found." % path)
+OSError: /tmp/ckt_da/designs_Zhenxin_S_FC/Zhenxin_S_FC_242.843_133.808_292.648_198.925_139.809_131.131_0.657_0.178_0.686_0.305_1.09580/ac.csv not found.
+
+
diff --git a/experiments/optimize-Zhenxin_S_FC_65nmPTM-run14/PPO_Zhenxin_S_FC_0_2025-09-04_16-10-519x116nc6/error_2025-09-05_01-18-02.txt b/experiments/optimize-Zhenxin_S_FC_65nmPTM-run14/PPO_Zhenxin_S_FC_0_2025-09-04_16-10-519x116nc6/error_2025-09-05_01-18-02.txt
new file mode 100644
index 0000000..ed8455b
--- /dev/null
+++ b/experiments/optimize-Zhenxin_S_FC_65nmPTM-run14/PPO_Zhenxin_S_FC_0_2025-09-04_16-10-519x116nc6/error_2025-09-05_01-18-02.txt
@@ -0,0 +1,52 @@
+Traceback (most recent call last):
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/tune/trial_runner.py", line 378, in _process_events
+ result = self.trial_executor.fetch_result(trial)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/tune/ray_trial_executor.py", line 228, in fetch_result
+ result = ray.get(trial_future[0])
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/worker.py", line 2132, in get
+ raise value
+ray.worker.RayTaskError: [36mray_worker[39m (pid=3651946, host=cda-server-2)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/agents/agent.py", line 284, in train
+ result = Trainable.train(self)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/tune/trainable.py", line 151, in train
+ result = self._train()
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/agents/ppo/ppo.py", line 103, in _train
+ fetches = self.optimizer.step()
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/optimizers/multi_gpu_optimizer.py", line 125, in step
+ self.num_envs_per_worker, self.train_batch_size)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/optimizers/rollout.py", line 28, in collect_samples
+ next_sample = ray.get(fut_sample)
+ray.worker.RayTaskError: [36mray_worker[39m (pid=3651940, host=cda-server-2)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/evaluation/policy_evaluator.py", line 368, in sample
+ batches = [self.input_reader.next()]
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/offline/input_reader.py", line 31, in next
+ batches = [self.sampler.get_data()]
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/evaluation/sampler.py", line 65, in get_data
+ item = next(self.rollout_provider)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/evaluation/sampler.py", line 267, in _env_runner
+ preprocessors, obs_filters, unroll_length, pack, callbacks)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/evaluation/sampler.py", line 403, in _process_observations
+ resetted_obs = base_env.try_reset(env_id)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/env/base_env.py", line 257, in try_reset
+ return {_DUMMY_AGENT_ID: self.vector_env.reset_at(env_id)}
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/env/vector_env.py", line 88, in reset_at
+ return self.envs[index].reset()
+ File "/home/pham/code/analog-ml/AutoCkt-optimize-Zhenxin_S_FC_65nmPTM-run14/autockt/envs/ngspice_zhenxin_s_fc.py", line 344, in reset
+ self.cur_specs = self.update(self.cur_params_idx)
+ File "/home/pham/code/analog-ml/AutoCkt-optimize-Zhenxin_S_FC_65nmPTM-run14/autockt/envs/ngspice_zhenxin_s_fc.py", line 496, in update
+ self.sim_env.create_design_and_simulate(param_val[0])[1].items(),
+ File "/home/pham/code/analog-ml/AutoCkt-optimize-Zhenxin_S_FC_65nmPTM-run14/eval_engines/ngspice/ngspice_wrapper.py", line 133, in create_design_and_simulate
+ specs = self.translate_result(design_folder)
+ File "/home/pham/code/analog-ml/AutoCkt-optimize-Zhenxin_S_FC_65nmPTM-run14/eval_engines/ngspice/Zhenxin_S_FC.py", line 29, in translate_result
+ freq, vout, ibias = self.parse_output(output_path)
+ File "/home/pham/code/analog-ml/AutoCkt-optimize-Zhenxin_S_FC_65nmPTM-run14/eval_engines/ngspice/Zhenxin_S_FC.py", line 46, in parse_output
+ ac_raw_outputs = np.genfromtxt(ac_fname, skip_header=1)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/numpy/lib/npyio.py", line 1744, in genfromtxt
+ fhd = iter(np.lib._datasource.open(fname, 'rt', encoding=encoding))
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/numpy/lib/_datasource.py", line 266, in open
+ return ds.open(path, mode, encoding=encoding, newline=newline)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/numpy/lib/_datasource.py", line 624, in open
+ raise IOError("%s not found." % path)
+OSError: /tmp/ckt_da/designs_Zhenxin_S_FC/Zhenxin_S_FC_242.843_133.808_292.648_198.925_139.809_131.131_0.657_0.178_0.686_0.305_1.04127/ac.csv not found.
+
+
diff --git a/experiments/optimize-Zhenxin_S_FC_65nmPTM-run14/PPO_Zhenxin_S_FC_0_2025-09-04_16-10-519x116nc6/error_2025-09-05_03-08-18.txt b/experiments/optimize-Zhenxin_S_FC_65nmPTM-run14/PPO_Zhenxin_S_FC_0_2025-09-04_16-10-519x116nc6/error_2025-09-05_03-08-18.txt
new file mode 100644
index 0000000..e01c1dc
--- /dev/null
+++ b/experiments/optimize-Zhenxin_S_FC_65nmPTM-run14/PPO_Zhenxin_S_FC_0_2025-09-04_16-10-519x116nc6/error_2025-09-05_03-08-18.txt
@@ -0,0 +1,52 @@
+Traceback (most recent call last):
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/tune/trial_runner.py", line 378, in _process_events
+ result = self.trial_executor.fetch_result(trial)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/tune/ray_trial_executor.py", line 228, in fetch_result
+ result = ray.get(trial_future[0])
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/worker.py", line 2132, in get
+ raise value
+ray.worker.RayTaskError: [36mray_worker[39m (pid=3651949, host=cda-server-2)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/agents/agent.py", line 284, in train
+ result = Trainable.train(self)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/tune/trainable.py", line 151, in train
+ result = self._train()
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/agents/ppo/ppo.py", line 103, in _train
+ fetches = self.optimizer.step()
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/optimizers/multi_gpu_optimizer.py", line 125, in step
+ self.num_envs_per_worker, self.train_batch_size)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/optimizers/rollout.py", line 28, in collect_samples
+ next_sample = ray.get(fut_sample)
+ray.worker.RayTaskError: [36mray_worker[39m (pid=3651952, host=cda-server-2)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/evaluation/policy_evaluator.py", line 368, in sample
+ batches = [self.input_reader.next()]
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/offline/input_reader.py", line 31, in next
+ batches = [self.sampler.get_data()]
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/evaluation/sampler.py", line 65, in get_data
+ item = next(self.rollout_provider)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/evaluation/sampler.py", line 267, in _env_runner
+ preprocessors, obs_filters, unroll_length, pack, callbacks)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/evaluation/sampler.py", line 403, in _process_observations
+ resetted_obs = base_env.try_reset(env_id)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/env/base_env.py", line 257, in try_reset
+ return {_DUMMY_AGENT_ID: self.vector_env.reset_at(env_id)}
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/ray/rllib/env/vector_env.py", line 88, in reset_at
+ return self.envs[index].reset()
+ File "/home/pham/code/analog-ml/AutoCkt-optimize-Zhenxin_S_FC_65nmPTM-run14/autockt/envs/ngspice_zhenxin_s_fc.py", line 344, in reset
+ self.cur_specs = self.update(self.cur_params_idx)
+ File "/home/pham/code/analog-ml/AutoCkt-optimize-Zhenxin_S_FC_65nmPTM-run14/autockt/envs/ngspice_zhenxin_s_fc.py", line 496, in update
+ self.sim_env.create_design_and_simulate(param_val[0])[1].items(),
+ File "/home/pham/code/analog-ml/AutoCkt-optimize-Zhenxin_S_FC_65nmPTM-run14/eval_engines/ngspice/ngspice_wrapper.py", line 133, in create_design_and_simulate
+ specs = self.translate_result(design_folder)
+ File "/home/pham/code/analog-ml/AutoCkt-optimize-Zhenxin_S_FC_65nmPTM-run14/eval_engines/ngspice/Zhenxin_S_FC.py", line 29, in translate_result
+ freq, vout, ibias = self.parse_output(output_path)
+ File "/home/pham/code/analog-ml/AutoCkt-optimize-Zhenxin_S_FC_65nmPTM-run14/eval_engines/ngspice/Zhenxin_S_FC.py", line 46, in parse_output
+ ac_raw_outputs = np.genfromtxt(ac_fname, skip_header=1)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/numpy/lib/npyio.py", line 1744, in genfromtxt
+ fhd = iter(np.lib._datasource.open(fname, 'rt', encoding=encoding))
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/numpy/lib/_datasource.py", line 266, in open
+ return ds.open(path, mode, encoding=encoding, newline=newline)
+ File "/home/pham/anaconda3/envs/autockt/lib/python3.5/site-packages/numpy/lib/_datasource.py", line 624, in open
+ raise IOError("%s not found." % path)
+OSError: /tmp/ckt_da/designs_Zhenxin_S_FC/Zhenxin_S_FC_242.843_133.808_292.648_198.925_139.809_131.131_0.657_0.178_0.686_0.305_1.02397/ac.csv not found.
+
+
diff --git a/experiments/optimize-Zhenxin_S_FC_65nmPTM-run14/PPO_Zhenxin_S_FC_0_2025-09-04_16-10-519x116nc6/events.out.tfevents.1756995307.cda-server-2 b/experiments/optimize-Zhenxin_S_FC_65nmPTM-run14/PPO_Zhenxin_S_FC_0_2025-09-04_16-10-519x116nc6/events.out.tfevents.1756995307.cda-server-2
new file mode 100644
index 0000000..58947ff
Binary files /dev/null and b/experiments/optimize-Zhenxin_S_FC_65nmPTM-run14/PPO_Zhenxin_S_FC_0_2025-09-04_16-10-519x116nc6/events.out.tfevents.1756995307.cda-server-2 differ
diff --git a/experiments/optimize-Zhenxin_S_FC_65nmPTM-run14/PPO_Zhenxin_S_FC_0_2025-09-04_16-10-519x116nc6/params.json b/experiments/optimize-Zhenxin_S_FC_65nmPTM-run14/PPO_Zhenxin_S_FC_0_2025-09-04_16-10-519x116nc6/params.json
new file mode 100644
index 0000000..76563ea
--- /dev/null
+++ b/experiments/optimize-Zhenxin_S_FC_65nmPTM-run14/PPO_Zhenxin_S_FC_0_2025-09-04_16-10-519x116nc6/params.json
@@ -0,0 +1,18 @@
+{
+ "env": "",
+ "env_config": {
+ "generalize": false,
+ "run_valid": false
+ },
+ "horizon": 50,
+ "model": {
+ "fcnet_hiddens": [
+ 128,
+ 128,
+ 128
+ ]
+ },
+ "num_gpus": 0,
+ "num_workers": 3,
+ "train_batch_size": 1200
+}
\ No newline at end of file
diff --git a/experiments/optimize-Zhenxin_S_FC_65nmPTM-run14/PPO_Zhenxin_S_FC_0_2025-09-04_16-10-519x116nc6/params.pkl b/experiments/optimize-Zhenxin_S_FC_65nmPTM-run14/PPO_Zhenxin_S_FC_0_2025-09-04_16-10-519x116nc6/params.pkl
new file mode 100644
index 0000000..f6f5f94
Binary files /dev/null and b/experiments/optimize-Zhenxin_S_FC_65nmPTM-run14/PPO_Zhenxin_S_FC_0_2025-09-04_16-10-519x116nc6/params.pkl differ
diff --git a/experiments/optimize-Zhenxin_S_FC_65nmPTM-run14/PPO_Zhenxin_S_FC_0_2025-09-04_16-10-519x116nc6/progress.csv b/experiments/optimize-Zhenxin_S_FC_65nmPTM-run14/PPO_Zhenxin_S_FC_0_2025-09-04_16-10-519x116nc6/progress.csv
new file mode 100644
index 0000000..cc8498f
--- /dev/null
+++ b/experiments/optimize-Zhenxin_S_FC_65nmPTM-run14/PPO_Zhenxin_S_FC_0_2025-09-04_16-10-519x116nc6/progress.csv
@@ -0,0 +1,941 @@
+hostname,done,time_since_restore,info,training_iteration,config,time_this_iter_s,num_metric_batches_dropped,policy_reward_mean,experiment_id,node_ip,timesteps_this_iter,timesteps_since_restore,timesteps_total,custom_metrics,iterations_since_restore,episodes_this_iter,episode_reward_min,date,episode_reward_max,pid,timestamp,episode_reward_mean,time_total_s,episodes_total,episode_len_mean
+cda-server-2,False,140.99133276939392,"{'sample_time_ms': 139859.504, 'num_steps_trained': 1200, 'grad_time_ms': 653.542, 'default': {'cur_kl_coeff': 0.20000000298023224, 'vf_loss': 2231.562255859375, 'policy_loss': -0.11362738162279129, 'vf_explained_var': -0.0003447002964094281, 'entropy': 15.597346305847168, 'cur_lr': 4.999999873689376e-05, 'total_loss': 2231.454833984375, 'kl': 0.03053244948387146}, 'load_time_ms': 27.299, 'num_steps_sampled': 1200, 'update_time_ms': 416.916}",1,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",140.99133276939392,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,1200,1200,{},1,24,-100.0666019790363,2025-09-04_16-15-07,-93.22332074316793,3651948,1756995307,-97.30795660981228,140.99133276939392,24,50.0
+cda-server-2,False,187.5420961380005,"{'sample_time_ms': 92992.711, 'num_steps_trained': 2400, 'grad_time_ms': 534.807, 'default': {'cur_kl_coeff': 0.30000001192092896, 'vf_loss': 2008.263427734375, 'policy_loss': -0.11293138563632965, 'vf_explained_var': -0.0319129154086113, 'entropy': 15.605307579040527, 'cur_lr': 4.999999873689376e-05, 'total_loss': 2008.15966796875, 'kl': 0.030827680602669716}, 'load_time_ms': 14.011, 'num_steps_sampled': 2400, 'update_time_ms': 209.994}",2,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.55076336860657,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,2400,2400,{},2,24,-100.0666019790363,2025-09-04_16-15-53,-93.22332074316793,3651948,1756995353,-97.6085290053284,187.5420961380005,48,50.0
+cda-server-2,False,248.49070477485657,"{'sample_time_ms': 82189.195, 'num_steps_trained': 3600, 'grad_time_ms': 475.83, 'default': {'cur_kl_coeff': 0.44999995827674866, 'vf_loss': 1854.1104736328125, 'policy_loss': -0.10487513989210129, 'vf_explained_var': -0.018647870048880577, 'entropy': 15.596461296081543, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1854.0181884765625, 'kl': 0.02738937921822071}, 'load_time_ms': 9.619, 'num_steps_sampled': 3600, 'update_time_ms': 140.997}",3,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",60.94860863685608,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,3600,3600,{},3,24,-100.0666019790363,2025-09-04_16-16-54,-93.22332074316793,3651948,1756995414,-97.54624563833285,248.49070477485657,72,50.0
+cda-server-2,False,289.3633248806,"{'sample_time_ms': 71768.578, 'num_steps_trained': 4800, 'grad_time_ms': 446.461, 'default': {'cur_kl_coeff': 0.675000011920929, 'vf_loss': 1733.2108154296875, 'policy_loss': -0.11116102337837219, 'vf_explained_var': -0.05004839599132538, 'entropy': 15.587655067443848, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1733.1148681640625, 'kl': 0.022528911009430885}, 'load_time_ms': 7.375, 'num_steps_sampled': 4800, 'update_time_ms': 106.338}",4,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.87262010574341,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,4800,4800,{},4,24,-100.0666019790363,2025-09-04_16-17-35,-91.89653622755112,3651948,1756995455,-97.5726961111477,289.3633248806,96,50.0
+cda-server-2,False,329.5035173892975,"{'sample_time_ms': 65366.457, 'num_steps_trained': 6000, 'grad_time_ms': 432.08, 'default': {'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 1653.20263671875, 'policy_loss': -0.10911934822797775, 'vf_explained_var': -0.07681050896644592, 'entropy': 15.577970504760742, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1653.112548828125, 'kl': 0.01889631897211075}, 'load_time_ms': 6.065, 'num_steps_sampled': 6000, 'update_time_ms': 85.553}",5,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.14019250869751,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,6000,6000,{},5,24,-99.99385424763929,2025-09-04_16-18-15,-91.89653622755112,3651948,1756995495,-97.58844576213276,329.5035173892975,120,50.0
+cda-server-2,False,369.60118436813354,"{'sample_time_ms': 61089.625, 'num_steps_trained': 7200, 'grad_time_ms': 424.118, 'default': {'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 1622.42236328125, 'policy_loss': -0.0988093689084053, 'vf_explained_var': -0.13713043928146362, 'entropy': 15.560051918029785, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1622.34228515625, 'kl': 0.018548818305134773}, 'load_time_ms': 5.173, 'num_steps_sampled': 7200, 'update_time_ms': 71.689}",6,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.09766697883606,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,7200,7200,{},6,24,-99.99385424763929,2025-09-04_16-18-55,-91.89653622755112,3651948,1756995535,-97.54368201093162,369.60118436813354,144,50.0
+cda-server-2,False,409.36658096313477,"{'sample_time_ms': 57991.308, 'num_steps_trained': 8400, 'grad_time_ms': 414.365, 'default': {'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 1449.89404296875, 'policy_loss': -0.10638123005628586, 'vf_explained_var': -0.13925179839134216, 'entropy': 15.54902172088623, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1449.80712890625, 'kl': 0.019342221319675446}, 'load_time_ms': 4.528, 'num_steps_sampled': 8400, 'update_time_ms': 61.871}",7,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.76539659500122,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,8400,8400,{},7,24,-99.99385424763929,2025-09-04_16-19-35,-30.71669919267596,3651948,1756995575,-96.51648214196463,409.36658096313477,168,49.64
+cda-server-2,False,449.37567710876465,"{'sample_time_ms': 55695.786, 'num_steps_trained': 9600, 'grad_time_ms': 409.353, 'default': {'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 1406.4390869140625, 'policy_loss': -0.10367625206708908, 'vf_explained_var': -0.18408912420272827, 'entropy': 15.551528930664062, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1406.35302734375, 'kl': 0.01746782474219799}, 'load_time_ms': 4.044, 'num_steps_sampled': 9600, 'update_time_ms': 54.458}",8,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.00909614562988,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,9600,9600,{},8,24,-99.89664753970594,2025-09-04_16-20-15,-24.47619018840004,3651948,1756995615,-95.89876277925154,449.37567710876465,192,49.33
+cda-server-2,False,490.18978786468506,"{'sample_time_ms': 53999.365, 'num_steps_trained': 10800, 'grad_time_ms': 405.921, 'default': {'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 1334.082275390625, 'policy_loss': -0.10778095573186874, 'vf_explained_var': -0.20987066626548767, 'entropy': 15.519222259521484, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1333.9931640625, 'kl': 0.01847856305539608}, 'load_time_ms': 3.674, 'num_steps_sampled': 10800, 'update_time_ms': 48.693}",9,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.81411075592041,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,10800,10800,{},9,25,-99.89664753970594,2025-09-04_16-20-56,-23.04501059558644,3651948,1756995656,-95.2285475105528,490.18978786468506,217,49.01
+cda-server-2,False,530.0455119609833,"{'sample_time_ms': 52548.508, 'num_steps_trained': 12000, 'grad_time_ms': 401.092, 'default': {'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 1255.589599609375, 'policy_loss': -0.11320510506629944, 'vf_explained_var': -0.24970334768295288, 'entropy': 15.53376293182373, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1255.4962158203125, 'kl': 0.019503416493535042}, 'load_time_ms': 3.373, 'num_steps_sampled': 12000, 'update_time_ms': 44.053}",10,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.85572409629822,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,12000,12000,{},10,24,-99.89664753970594,2025-09-04_16-21-36,-23.04501059558644,3651948,1756995696,-94.32109097779768,530.0455119609833,241,48.7
+cda-server-2,False,569.8694930076599,"{'sample_time_ms': 42508.571, 'num_steps_trained': 13200, 'grad_time_ms': 371.291, 'default': {'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 1171.419677734375, 'policy_loss': -0.10418149828910828, 'vf_explained_var': -0.26429101824760437, 'entropy': 15.501246452331543, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1171.3345947265625, 'kl': 0.01891779899597168}, 'load_time_ms': 0.705, 'num_steps_sampled': 13200, 'update_time_ms': 2.679}",11,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.823981046676636,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,13200,13200,{},11,25,-99.9892110402293,2025-09-04_16-22-16,-23.04501059558644,3651948,1756995736,-94.70175302960016,569.8694930076599,266,48.81
+cda-server-2,False,609.4798724651337,"{'sample_time_ms': 41820.453, 'num_steps_trained': 14400, 'grad_time_ms': 365.48, 'default': {'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 1192.4371337890625, 'policy_loss': -0.10855650901794434, 'vf_explained_var': -0.3701235353946686, 'entropy': 15.489436149597168, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1192.3475341796875, 'kl': 0.018641583621501923}, 'load_time_ms': 0.704, 'num_steps_sampled': 14400, 'update_time_ms': 2.628}",12,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.610379457473755,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,14400,14400,{},12,24,-99.9892110402293,2025-09-04_16-22-55,-23.04501059558644,3651948,1756995775,-95.53298387289084,609.4798724651337,290,49.13
+cda-server-2,False,649.2679927349091,"{'sample_time_ms': 39703.982, 'num_steps_trained': 15600, 'grad_time_ms': 365.999, 'default': {'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 1188.027099609375, 'policy_loss': -0.10490735620260239, 'vf_explained_var': -0.46393129229545593, 'entropy': 15.496283531188965, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1187.9417724609375, 'kl': 0.019278578460216522}, 'load_time_ms': 0.698, 'num_steps_sampled': 15600, 'update_time_ms': 2.577}",13,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.78812026977539,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,15600,15600,{},13,24,-99.9892110402293,2025-09-04_16-23-35,-25.85968405258626,3651948,1756995815,-96.20017378990354,649.2679927349091,314,49.45
+cda-server-2,False,689.2937431335449,"{'sample_time_ms': 39617.616, 'num_steps_trained': 16800, 'grad_time_ms': 367.68, 'default': {'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 1119.98095703125, 'policy_loss': -0.1071331575512886, 'vf_explained_var': -0.3960515260696411, 'entropy': 15.486916542053223, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1119.8934326171875, 'kl': 0.019492844119668007}, 'load_time_ms': 0.713, 'num_steps_sampled': 16800, 'update_time_ms': 2.57}",14,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.025750398635864,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,16800,16800,{},14,24,-100.10138569553668,2025-09-04_16-24-15,-38.72316905582058,3651948,1756995855,-97.13363621111745,689.2937431335449,338,49.76
+cda-server-2,False,729.1800971031189,"{'sample_time_ms': 39591.578, 'num_steps_trained': 18000, 'grad_time_ms': 368.351, 'default': {'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 1143.7335205078125, 'policy_loss': -0.10774454474449158, 'vf_explained_var': -0.4819021224975586, 'entropy': 15.48103141784668, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1143.6439208984375, 'kl': 0.017882168292999268}, 'load_time_ms': 0.697, 'num_steps_sampled': 18000, 'update_time_ms': 2.57}",15,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.886353969573975,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,18000,18000,{},15,24,-100.10138569553668,2025-09-04_16-24-55,-92.41579714679654,3651948,1756995895,-97.83491767123333,729.1800971031189,362,50.0
+cda-server-2,False,769.293693780899,"{'sample_time_ms': 39595.882, 'num_steps_trained': 19200, 'grad_time_ms': 365.663, 'default': {'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 1088.394287109375, 'policy_loss': -0.1057095155119896, 'vf_explained_var': -0.46451839804649353, 'entropy': 15.46332836151123, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1088.307373046875, 'kl': 0.01861894316971302}, 'load_time_ms': 0.689, 'num_steps_sampled': 19200, 'update_time_ms': 2.606}",16,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.11359667778015,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,19200,19200,{},16,24,-100.10138569553668,2025-09-04_16-25-35,-92.41579714679654,3651948,1756995935,-97.72628511499211,769.293693780899,386,50.0
+cda-server-2,False,808.9712433815002,"{'sample_time_ms': 39585.098, 'num_steps_trained': 20400, 'grad_time_ms': 367.68, 'default': {'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 1159.6478271484375, 'policy_loss': -0.12469884753227234, 'vf_explained_var': -0.5814424157142639, 'entropy': 15.468914031982422, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1159.5428466796875, 'kl': 0.01942109689116478}, 'load_time_ms': 0.7, 'num_steps_sampled': 20400, 'update_time_ms': 2.561}",17,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.677549600601196,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,20400,20400,{},17,24,-100.10138569553668,2025-09-04_16-26-15,-94.52557691990086,3651948,1756995975,-97.73629758608034,808.9712433815002,410,50.0
+cda-server-2,False,849.0680379867554,"{'sample_time_ms': 39592.423, 'num_steps_trained': 21600, 'grad_time_ms': 369.09, 'default': {'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 1102.488037109375, 'policy_loss': -0.10093361139297485, 'vf_explained_var': -0.46737515926361084, 'entropy': 15.47008228302002, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1102.40625, 'kl': 0.01886645331978798}, 'load_time_ms': 0.73, 'num_steps_sampled': 21600, 'update_time_ms': 2.575}",18,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.09679460525513,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,21600,21600,{},18,25,-99.94909641233812,2025-09-04_16-26-55,-1.137450634299789,3651948,1756996015,-95.90983970514493,849.0680379867554,435,49.19
+cda-server-2,False,888.7057158946991,"{'sample_time_ms': 39476.752, 'num_steps_trained': 22800, 'grad_time_ms': 367.102, 'default': {'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 1188.67138671875, 'policy_loss': -0.11527708917856216, 'vf_explained_var': -0.5554392337799072, 'entropy': 15.453225135803223, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1188.5748291015625, 'kl': 0.018716327846050262}, 'load_time_ms': 0.722, 'num_steps_sampled': 22800, 'update_time_ms': 2.588}",19,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.637677907943726,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,22800,22800,{},19,24,-99.85778078216784,2025-09-04_16-27-35,-1.137450634299789,3651948,1756996055,-95.80412959682307,888.7057158946991,459,49.19
+cda-server-2,False,928.5402855873108,"{'sample_time_ms': 39473.313, 'num_steps_trained': 24000, 'grad_time_ms': 368.399, 'default': {'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 1222.9095458984375, 'policy_loss': -0.1030873954296112, 'vf_explained_var': -0.6650868654251099, 'entropy': 15.46270751953125, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1222.82666015625, 'kl': 0.019915420562028885}, 'load_time_ms': 0.726, 'num_steps_sampled': 24000, 'update_time_ms': 2.605}",20,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.834569692611694,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,24000,24000,{},20,24,-99.85778078216784,2025-09-04_16-28-15,-1.137450634299789,3651948,1756996095,-95.87886228236576,928.5402855873108,483,49.19
+cda-server-2,False,968.6658818721771,"{'sample_time_ms': 39501.469, 'num_steps_trained': 25200, 'grad_time_ms': 370.435, 'default': {'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 1268.17236328125, 'policy_loss': -0.09783076494932175, 'vf_explained_var': -0.7616844177246094, 'entropy': 15.439361572265625, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1268.0931396484375, 'kl': 0.018327785655856133}, 'load_time_ms': 0.733, 'num_steps_sampled': 25200, 'update_time_ms': 2.6}",21,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.12559628486633,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,25200,25200,{},21,24,-99.86032434277038,2025-09-04_16-28-55,-1.137450634299789,3651948,1756996135,-95.82387425761877,968.6658818721771,507,49.19
+cda-server-2,False,1008.8821487426758,"{'sample_time_ms': 39559.169, 'num_steps_trained': 26400, 'grad_time_ms': 373.318, 'default': {'cur_kl_coeff': 1.0125000476837158, 'vf_loss': 1154.8974609375, 'policy_loss': -0.10972815752029419, 'vf_explained_var': -0.6122896671295166, 'entropy': 15.398881912231445, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1154.80810546875, 'kl': 0.02011170983314514}, 'load_time_ms': 0.735, 'num_steps_sampled': 26400, 'update_time_ms': 2.583}",22,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.21626687049866,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,26400,26400,{},22,24,-99.86032434277038,2025-09-04_16-29-35,-90.4005844146529,3651948,1756996175,-97.62980122668787,1008.8821487426758,531,50.0
+cda-server-2,False,1048.678347826004,"{'sample_time_ms': 39560.576, 'num_steps_trained': 27600, 'grad_time_ms': 372.732, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1281.469970703125, 'policy_loss': -0.10534890741109848, 'vf_explained_var': -0.8017933964729309, 'entropy': 15.448863983154297, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1281.388916015625, 'kl': 0.015983637422323227}, 'load_time_ms': 0.726, 'num_steps_sampled': 27600, 'update_time_ms': 2.585}",23,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.79619908332825,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,27600,27600,{},23,24,-99.86032434277038,2025-09-04_16-30-15,-90.4005844146529,3651948,1756996215,-97.49737593284527,1048.678347826004,555,50.0
+cda-server-2,False,1089.2785403728485,"{'sample_time_ms': 39620.043, 'num_steps_trained': 28800, 'grad_time_ms': 370.708, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1191.23193359375, 'policy_loss': -0.09724703431129456, 'vf_explained_var': -0.7418419718742371, 'entropy': 15.407340049743652, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1191.15380859375, 'kl': 0.01257497537881136}, 'load_time_ms': 0.723, 'num_steps_sampled': 28800, 'update_time_ms': 2.588}",24,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.60019254684448,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,28800,28800,{},24,24,-99.98463372714971,2025-09-04_16-30-55,-90.4005844146529,3651948,1756996255,-97.50489288226183,1089.2785403728485,579,50.0
+cda-server-2,False,1129.0712842941284,"{'sample_time_ms': 39613.007, 'num_steps_trained': 30000, 'grad_time_ms': 368.383, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1242.32080078125, 'policy_loss': -0.12498721480369568, 'vf_explained_var': -0.7720822095870972, 'entropy': 15.434539794921875, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1242.2161865234375, 'kl': 0.013309704139828682}, 'load_time_ms': 0.727, 'num_steps_sampled': 30000, 'update_time_ms': 2.583}",25,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.79274392127991,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,30000,30000,{},25,24,-100.10329485311799,2025-09-04_16-31-35,-94.09882496122897,3651948,1756996295,-97.35178240898782,1129.0712842941284,603,50.0
+cda-server-2,False,1168.7032897472382,"{'sample_time_ms': 39562.756, 'num_steps_trained': 31200, 'grad_time_ms': 370.497, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1173.93701171875, 'policy_loss': -0.10411402583122253, 'vf_explained_var': -0.6004043221473694, 'entropy': 15.364545822143555, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1173.8564453125, 'kl': 0.01551245991140604}, 'load_time_ms': 0.725, 'num_steps_sampled': 31200, 'update_time_ms': 2.535}",26,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.63200545310974,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,31200,31200,{},26,24,-100.10329485311799,2025-09-04_16-32-15,-57.801233031301635,3651948,1756996335,-97.0011269918407,1168.7032897472382,627,49.86
+cda-server-2,False,1208.4105989933014,"{'sample_time_ms': 39565.133, 'num_steps_trained': 32400, 'grad_time_ms': 371.029, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1281.2742919921875, 'policy_loss': -0.10503542423248291, 'vf_explained_var': -0.700732946395874, 'entropy': 15.384541511535645, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1281.1903076171875, 'kl': 0.013778585940599442}, 'load_time_ms': 0.749, 'num_steps_sampled': 32400, 'update_time_ms': 2.55}",27,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.70730924606323,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,32400,32400,{},27,24,-100.10329485311799,2025-09-04_16-32-55,-57.801233031301635,3651948,1756996375,-97.03697085146841,1208.4105989933014,651,49.86
+cda-server-2,False,1248.9155259132385,"{'sample_time_ms': 39606.656, 'num_steps_trained': 33600, 'grad_time_ms': 370.364, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1321.213623046875, 'policy_loss': -0.10575778782367706, 'vf_explained_var': -0.8148228526115417, 'entropy': 15.369461059570312, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1321.1295166015625, 'kl': 0.014238353818655014}, 'load_time_ms': 0.72, 'num_steps_sampled': 33600, 'update_time_ms': 2.546}",28,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.504926919937134,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,33600,33600,{},28,24,-100.10329485311799,2025-09-04_16-33-35,-57.801233031301635,3651948,1756996415,-97.03483582868591,1248.9155259132385,675,49.86
+cda-server-2,False,1288.9718182086945,"{'sample_time_ms': 39647.71, 'num_steps_trained': 34800, 'grad_time_ms': 371.231, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1341.10400390625, 'policy_loss': -0.10665473341941833, 'vf_explained_var': -0.8043767213821411, 'entropy': 15.401509284973145, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1341.019775390625, 'kl': 0.014626596122980118}, 'load_time_ms': 0.72, 'num_steps_sampled': 34800, 'update_time_ms': 2.513}",29,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.05629229545593,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,34800,34800,{},29,24,-99.95508627430446,2025-09-04_16-34-15,-57.801233031301635,3651948,1756996455,-97.0966372787425,1288.9718182086945,699,49.86
+cda-server-2,False,1328.9700276851654,"{'sample_time_ms': 39662.755, 'num_steps_trained': 36000, 'grad_time_ms': 372.537, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1277.83251953125, 'policy_loss': -0.10264497995376587, 'vf_explained_var': -0.7749524712562561, 'entropy': 15.299591064453125, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1277.75146484375, 'kl': 0.014280046336352825}, 'load_time_ms': 0.715, 'num_steps_sampled': 36000, 'update_time_ms': 2.514}",30,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.99820947647095,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,36000,36000,{},30,25,-99.99093155757775,2025-09-04_16-34-55,0.0015531449246815043,3651948,1756996495,-96.37514610728846,1328.9700276851654,724,49.56
+cda-server-2,False,1369.1333026885986,"{'sample_time_ms': 39665.146, 'num_steps_trained': 37200, 'grad_time_ms': 373.787, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1310.75341796875, 'policy_loss': -0.10099545121192932, 'vf_explained_var': -0.7382559180259705, 'entropy': 15.32412052154541, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1310.672119140625, 'kl': 0.012976918369531631}, 'load_time_ms': 0.725, 'num_steps_sampled': 37200, 'update_time_ms': 2.568}",31,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.16327500343323,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,37200,37200,{},31,25,-99.99093155757775,2025-09-04_16-35-36,0.0015531449246815043,3651948,1756996536,-95.63129160278804,1369.1333026885986,749,49.21
+cda-server-2,False,1408.7570397853851,"{'sample_time_ms': 39608.775, 'num_steps_trained': 38400, 'grad_time_ms': 370.909, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1223.63525390625, 'policy_loss': -0.12073878198862076, 'vf_explained_var': -0.7618313431739807, 'entropy': 15.325020790100098, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1223.53466796875, 'kl': 0.01335633173584938}, 'load_time_ms': 0.731, 'num_steps_sampled': 38400, 'update_time_ms': 2.589}",32,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.6237370967865,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,38400,38400,{},32,24,-99.99093155757775,2025-09-04_16-36-15,0.0015531449246815043,3651948,1756996575,-95.55989689348331,1408.7570397853851,773,49.21
+cda-server-2,False,1448.3962044715881,"{'sample_time_ms': 39589.868, 'num_steps_trained': 39600, 'grad_time_ms': 374.091, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1231.0386962890625, 'policy_loss': -0.11493682861328125, 'vf_explained_var': -0.7585346698760986, 'entropy': 15.28339672088623, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1230.9471435546875, 'kl': 0.015361123718321323}, 'load_time_ms': 0.733, 'num_steps_sampled': 39600, 'update_time_ms': 2.56}",33,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.639164686203,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,39600,39600,{},33,24,-99.99093155757775,2025-09-04_16-36-55,0.0015531449246815043,3651948,1756996615,-95.615816928204,1448.3962044715881,797,49.21
+cda-server-2,False,1488.3905136585236,"{'sample_time_ms': 39528.843, 'num_steps_trained': 40800, 'grad_time_ms': 374.478, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1203.33447265625, 'policy_loss': -0.10448554903268814, 'vf_explained_var': -0.7520565390586853, 'entropy': 15.311238288879395, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1203.2515869140625, 'kl': 0.014262043870985508}, 'load_time_ms': 0.723, 'num_steps_sampled': 40800, 'update_time_ms': 2.637}",34,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.994309186935425,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,40800,40800,{},34,24,-99.73257844882728,2025-09-04_16-37-35,-16.290060169030422,3651948,1756996655,-96.62528324751145,1488.3905136585236,821,49.65
+cda-server-2,False,1528.3656723499298,"{'sample_time_ms': 39545.009, 'num_steps_trained': 42000, 'grad_time_ms': 376.545, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1227.427978515625, 'policy_loss': -0.11075553297996521, 'vf_explained_var': -0.801076352596283, 'entropy': 15.274660110473633, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1227.34033203125, 'kl': 0.015269107185304165}, 'load_time_ms': 0.725, 'num_steps_sampled': 42000, 'update_time_ms': 2.623}",35,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.97515869140625,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,42000,42000,{},35,24,-99.73257844882728,2025-09-04_16-38-15,-77.82174753169423,3651948,1756996695,-97.0927170250508,1528.3656723499298,845,49.95
+cda-server-2,False,1568.491044998169,"{'sample_time_ms': 39596.393, 'num_steps_trained': 43200, 'grad_time_ms': 374.488, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1230.39404296875, 'policy_loss': -0.1102805882692337, 'vf_explained_var': -0.7730542421340942, 'entropy': 15.220480918884277, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1230.304931640625, 'kl': 0.013823870569467545}, 'load_time_ms': 0.728, 'num_steps_sampled': 43200, 'update_time_ms': 2.645}",36,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.125372648239136,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,43200,43200,{},36,24,-99.99007305617893,2025-09-04_16-38-55,-77.82174753169423,3651948,1756996735,-97.28325003911186,1568.491044998169,869,49.95
+cda-server-2,False,1608.1779806613922,"{'sample_time_ms': 39594.501, 'num_steps_trained': 44400, 'grad_time_ms': 374.389, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1233.9117431640625, 'policy_loss': -0.1051551029086113, 'vf_explained_var': -0.6987488865852356, 'entropy': 15.250106811523438, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1233.82861328125, 'kl': 0.014496508985757828}, 'load_time_ms': 0.694, 'num_steps_sampled': 44400, 'update_time_ms': 2.627}",37,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.68693566322327,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,44400,44400,{},37,24,-99.99007305617893,2025-09-04_16-39-35,-77.82174753169423,3651948,1756996775,-97.21012824430167,1608.1779806613922,893,49.95
+cda-server-2,False,1648.4379494190216,"{'sample_time_ms': 39570.653, 'num_steps_trained': 45600, 'grad_time_ms': 373.73, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1277.79052734375, 'policy_loss': -0.10825362056493759, 'vf_explained_var': -0.7801445126533508, 'entropy': 15.28171157836914, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1277.70361328125, 'kl': 0.013901184312999249}, 'load_time_ms': 0.694, 'num_steps_sampled': 45600, 'update_time_ms': 2.637}",38,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.259968757629395,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,45600,45600,{},38,24,-99.99007305617893,2025-09-04_16-40-15,-77.82174753169423,3651948,1756996815,-97.04979273483048,1648.4379494190216,917,49.95
+cda-server-2,False,1688.43723654747,"{'sample_time_ms': 39563.71, 'num_steps_trained': 46800, 'grad_time_ms': 374.899, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1167.91552734375, 'policy_loss': -0.10696208477020264, 'vf_explained_var': -0.7553014755249023, 'entropy': 15.204646110534668, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1167.8319091796875, 'kl': 0.015371869318187237}, 'load_time_ms': 0.703, 'num_steps_sampled': 46800, 'update_time_ms': 2.655}",39,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.999287128448486,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,46800,46800,{},39,25,-99.99007305617893,2025-09-04_16-40-55,-15.99934133821527,3651948,1756996855,-96.36363020491022,1688.43723654747,942,49.59
+cda-server-2,False,1729.0509288311005,"{'sample_time_ms': 39625.919, 'num_steps_trained': 48000, 'grad_time_ms': 374.175, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1148.047119140625, 'policy_loss': -0.10330415517091751, 'vf_explained_var': -0.7272942662239075, 'entropy': 15.175899505615234, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1147.966552734375, 'kl': 0.015090687200427055}, 'load_time_ms': 0.721, 'num_steps_sampled': 48000, 'update_time_ms': 2.669}",40,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.61369228363037,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,48000,48000,{},40,24,-99.95344412932664,2025-09-04_16-41-36,-15.99934133821527,3651948,1756996896,-96.24264843934478,1729.0509288311005,966,49.59
+cda-server-2,False,1769.8827843666077,"{'sample_time_ms': 39694.499, 'num_steps_trained': 49200, 'grad_time_ms': 372.5, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1252.537109375, 'policy_loss': -0.11457589268684387, 'vf_explained_var': -0.7772528529167175, 'entropy': 15.200519561767578, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1252.4432373046875, 'kl': 0.013611800968647003}, 'load_time_ms': 0.725, 'num_steps_sampled': 49200, 'update_time_ms': 2.658}",41,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.8318555355072,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,49200,49200,{},41,24,-99.95344412932664,2025-09-04_16-42-17,-15.99934133821527,3651948,1756996937,-96.22698868572897,1769.8827843666077,990,49.59
+cda-server-2,False,1809.5630688667297,"{'sample_time_ms': 39697.47, 'num_steps_trained': 50400, 'grad_time_ms': 375.234, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1227.67529296875, 'policy_loss': -0.10887836664915085, 'vf_explained_var': -0.778679609298706, 'entropy': 15.25713062286377, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1227.587158203125, 'kl': 0.013566892594099045}, 'load_time_ms': 0.718, 'num_steps_sampled': 50400, 'update_time_ms': 2.617}",42,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.68028450012207,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,50400,50400,{},42,24,-99.95344412932664,2025-09-04_16-42-56,-15.99934133821527,3651948,1756996976,-96.22189939411399,1809.5630688667297,1014,49.59
+cda-server-2,False,1849.4926145076752,"{'sample_time_ms': 39729.576, 'num_steps_trained': 51600, 'grad_time_ms': 372.205, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1097.594970703125, 'policy_loss': -0.11680027842521667, 'vf_explained_var': -0.6031178832054138, 'entropy': 15.132685661315918, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1097.4976806640625, 'kl': 0.012929944321513176}, 'load_time_ms': 0.72, 'num_steps_sampled': 51600, 'update_time_ms': 2.628}",43,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.929545640945435,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,51600,51600,{},43,24,-99.84089460076768,2025-09-04_16-43-36,-83.19525614553856,3651948,1756997016,-97.00053722716505,1849.4926145076752,1038,49.98
+cda-server-2,False,1889.2982964515686,"{'sample_time_ms': 39710.673, 'num_steps_trained': 52800, 'grad_time_ms': 372.29, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1327.10302734375, 'policy_loss': -0.11665192991495132, 'vf_explained_var': -0.7370307445526123, 'entropy': 15.16311264038086, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1327.0091552734375, 'kl': 0.014845062047243118}, 'load_time_ms': 0.718, 'num_steps_sampled': 52800, 'update_time_ms': 2.547}",44,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.80568194389343,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,52800,52800,{},44,24,-99.84089460076768,2025-09-04_16-44-16,-47.30665988731469,3651948,1756997056,-96.36052394042983,1889.2982964515686,1062,49.79
+cda-server-2,False,1929.0171658992767,"{'sample_time_ms': 39686.403, 'num_steps_trained': 54000, 'grad_time_ms': 370.849, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1300.06591796875, 'policy_loss': -0.11423023790121078, 'vf_explained_var': -0.7665535807609558, 'entropy': 15.056652069091797, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1299.973388671875, 'kl': 0.014357775449752808}, 'load_time_ms': 0.715, 'num_steps_sampled': 54000, 'update_time_ms': 2.623}",45,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.71886944770813,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,54000,54000,{},45,24,-99.84089460076768,2025-09-04_16-44-56,-47.30665988731469,3651948,1756997096,-96.35048571213896,1929.0171658992767,1086,49.79
+cda-server-2,False,1968.8651938438416,"{'sample_time_ms': 39658.494, 'num_steps_trained': 55200, 'grad_time_ms': 370.962, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1358.3763427734375, 'policy_loss': -0.1065993681550026, 'vf_explained_var': -0.79640793800354, 'entropy': 15.09638500213623, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1358.2899169921875, 'kl': 0.013298786245286465}, 'load_time_ms': 0.713, 'num_steps_sampled': 55200, 'update_time_ms': 2.665}",46,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.84802794456482,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,55200,55200,{},46,24,-99.84089460076768,2025-09-04_16-45-36,-47.30665988731469,3651948,1756997136,-96.46923531968903,1968.8651938438416,1110,49.79
+cda-server-2,False,2010.0142283439636,"{'sample_time_ms': 39805.123, 'num_steps_trained': 56400, 'grad_time_ms': 370.536, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1360.581298828125, 'policy_loss': -0.11323577910661697, 'vf_explained_var': -0.7683766484260559, 'entropy': 15.034567832946777, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1360.4910888671875, 'kl': 0.015101809985935688}, 'load_time_ms': 0.717, 'num_steps_sampled': 56400, 'update_time_ms': 2.664}",47,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.14903450012207,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,56400,56400,{},47,24,-99.70491179654027,2025-09-04_16-46-17,-47.30665988731469,3651948,1756997177,-96.50030034668707,2010.0142283439636,1134,49.79
+cda-server-2,False,2050.1419506073,"{'sample_time_ms': 39793.815, 'num_steps_trained': 57600, 'grad_time_ms': 368.623, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1230.81640625, 'policy_loss': -0.11603689193725586, 'vf_explained_var': -0.7617323994636536, 'entropy': 14.98969554901123, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1230.7230224609375, 'kl': 0.014867722988128662}, 'load_time_ms': 0.721, 'num_steps_sampled': 57600, 'update_time_ms': 2.658}",48,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.12772226333618,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,57600,57600,{},48,25,-99.70491179654027,2025-09-04_16-46-57,-50.47800847607699,3651948,1756997217,-96.78667045656734,2050.1419506073,1159,49.83
+cda-server-2,False,2090.207808494568,"{'sample_time_ms': 39800.839, 'num_steps_trained': 58800, 'grad_time_ms': 368.33, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1210.0145263671875, 'policy_loss': -0.1278069019317627, 'vf_explained_var': -0.7858371138572693, 'entropy': 14.963143348693848, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1209.9093017578125, 'kl': 0.014916043728590012}, 'load_time_ms': 0.716, 'num_steps_sampled': 58800, 'update_time_ms': 2.632}",49,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.065857887268066,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,58800,58800,{},49,24,-99.69965493226601,2025-09-04_16-47-37,-36.92857428593311,3651948,1756997257,-96.00154953185834,2090.207808494568,1183,49.58
+cda-server-2,False,2130.0400941371918,"{'sample_time_ms': 39722.489, 'num_steps_trained': 60000, 'grad_time_ms': 368.589, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1181.7391357421875, 'policy_loss': -0.11024336516857147, 'vf_explained_var': -0.7595869302749634, 'entropy': 14.997981071472168, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1181.6502685546875, 'kl': 0.014109021984040737}, 'load_time_ms': 0.704, 'num_steps_sampled': 60000, 'update_time_ms': 2.607}",50,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.8322856426239,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,60000,60000,{},50,26,-99.69965493226601,2025-09-04_16-48-17,2.000894818521134,3651948,1756997297,-94.21600584758427,2130.0400941371918,1209,48.88
+cda-server-2,False,2169.8263907432556,"{'sample_time_ms': 39616.721, 'num_steps_trained': 61200, 'grad_time_ms': 369.792, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1206.790771484375, 'policy_loss': -0.10038409382104874, 'vf_explained_var': -0.7725622653961182, 'entropy': 14.993680953979492, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1206.7130126953125, 'kl': 0.014860378578305244}, 'load_time_ms': 0.699, 'num_steps_sampled': 61200, 'update_time_ms': 2.575}",51,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.78629660606384,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,61200,61200,{},51,24,-99.69965493226601,2025-09-04_16-48-57,2.000894818521134,3651948,1756997337,-94.16899019027835,2169.8263907432556,1233,48.88
+cda-server-2,False,2210.4999437332153,"{'sample_time_ms': 39718.94, 'num_steps_trained': 62400, 'grad_time_ms': 366.862, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1131.96240234375, 'policy_loss': -0.11758121848106384, 'vf_explained_var': -0.6979755163192749, 'entropy': 14.95267105102539, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1131.8658447265625, 'kl': 0.013862605206668377}, 'load_time_ms': 0.691, 'num_steps_sampled': 62400, 'update_time_ms': 2.612}",52,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.67355298995972,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,62400,62400,{},52,26,-99.69965493226601,2025-09-04_16-49-37,8.000000567682516,3651948,1756997377,-92.0585035779024,2210.4999437332153,1259,47.93
+cda-server-2,False,2250.7435400485992,"{'sample_time_ms': 39748.417, 'num_steps_trained': 63600, 'grad_time_ms': 368.77, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1224.529541015625, 'policy_loss': -0.09991131722927094, 'vf_explained_var': -0.7694526314735413, 'entropy': 14.912704467773438, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1224.4503173828125, 'kl': 0.013661215081810951}, 'load_time_ms': 0.691, 'num_steps_sampled': 63600, 'update_time_ms': 2.613}",53,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.24359631538391,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,63600,63600,{},53,25,-99.68827816877031,2025-09-04_16-50-18,8.000000567682516,3651948,1756997418,-91.20529984729008,2250.7435400485992,1284,47.52
+cda-server-2,False,2290.6784195899963,"{'sample_time_ms': 39758.858, 'num_steps_trained': 64800, 'grad_time_ms': 371.236, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1310.67138671875, 'policy_loss': -0.10968722403049469, 'vf_explained_var': -0.8816094398498535, 'entropy': 15.012337684631348, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1310.582763671875, 'kl': 0.013880123384296894}, 'load_time_ms': 0.688, 'num_steps_sampled': 64800, 'update_time_ms': 2.624}",54,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.934879541397095,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,64800,64800,{},54,24,-99.68827816877031,2025-09-04_16-50-58,8.000000567682516,3651948,1756997458,-92.94233478779394,2290.6784195899963,1308,48.22
+cda-server-2,False,2330.734453201294,"{'sample_time_ms': 39790.737, 'num_steps_trained': 66000, 'grad_time_ms': 373.097, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1422.956787109375, 'policy_loss': -0.12223473936319351, 'vf_explained_var': -0.8387157320976257, 'entropy': 15.144838333129883, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1422.8563232421875, 'kl': 0.01441657543182373}, 'load_time_ms': 0.702, 'num_steps_sampled': 66000, 'update_time_ms': 2.597}",55,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.05603361129761,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,66000,66000,{},55,25,-99.68827816877031,2025-09-04_16-51-38,8.000000567682516,3651948,1756997498,-91.4661865953845,2330.734453201294,1333,47.67
+cda-server-2,False,2370.54008436203,"{'sample_time_ms': 39784.586, 'num_steps_trained': 67200, 'grad_time_ms': 375.042, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1342.798828125, 'policy_loss': -0.11646595597267151, 'vf_explained_var': -0.7877098917961121, 'entropy': 15.047779083251953, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1342.707275390625, 'kl': 0.016279883682727814}, 'load_time_ms': 0.714, 'num_steps_sampled': 67200, 'update_time_ms': 2.59}",56,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.805631160736084,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,67200,67200,{},56,24,-99.22056810823626,2025-09-04_16-52-17,1.1405470155882025,3651948,1756997537,-93.87697515324817,2370.54008436203,1357,48.79
+cda-server-2,False,2410.359657764435,"{'sample_time_ms': 39652.665, 'num_steps_trained': 68400, 'grad_time_ms': 373.994, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1294.26953125, 'policy_loss': -0.1283871829509735, 'vf_explained_var': -0.7179339528083801, 'entropy': 14.984747886657715, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1294.1630859375, 'kl': 0.014505099505186081}, 'load_time_ms': 0.709, 'num_steps_sampled': 68400, 'update_time_ms': 2.615}",57,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.819573402404785,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,68400,68400,{},57,24,-99.30733801768991,2025-09-04_16-52-57,1.1405470155882025,3651948,1756997577,-94.32058205387851,2410.359657764435,1381,49.01
+cda-server-2,False,2451.774926185608,"{'sample_time_ms': 39781.232, 'num_steps_trained': 69600, 'grad_time_ms': 374.189, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1268.938720703125, 'policy_loss': -0.11768833547830582, 'vf_explained_var': -0.7330797910690308, 'entropy': 14.87173080444336, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1268.8406982421875, 'kl': 0.01305652316659689}, 'load_time_ms': 0.7, 'num_steps_sampled': 69600, 'update_time_ms': 2.616}",58,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.415268421173096,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,69600,69600,{},58,24,-99.30733801768991,2025-09-04_16-53-39,-3.6157548869232627,3651948,1756997619,-95.15196150291067,2451.774926185608,1405,49.44
+cda-server-2,False,2492.544373989105,"{'sample_time_ms': 39851.185, 'num_steps_trained': 70800, 'grad_time_ms': 374.531, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1244.011474609375, 'policy_loss': -0.11157584190368652, 'vf_explained_var': -0.7300561666488647, 'entropy': 14.927780151367188, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1243.9197998046875, 'kl': 0.013102485798299313}, 'load_time_ms': 0.699, 'num_steps_sampled': 70800, 'update_time_ms': 2.636}",59,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.769447803497314,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,70800,70800,{},59,25,-99.30733801768991,2025-09-04_16-54-19,-3.6157548869232627,3651948,1756997659,-94.71678434113755,2492.544373989105,1430,49.24
+cda-server-2,False,2532.3875205516815,"{'sample_time_ms': 39852.211, 'num_steps_trained': 72000, 'grad_time_ms': 374.575, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1167.0535888671875, 'policy_loss': -0.11288302391767502, 'vf_explained_var': -0.7880843281745911, 'entropy': 14.89885139465332, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1166.9609375, 'kl': 0.013332750648260117}, 'load_time_ms': 0.706, 'num_steps_sampled': 72000, 'update_time_ms': 2.64}",60,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.843146562576294,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,72000,72000,{},60,25,-99.60113338733126,2025-09-04_16-54-59,-43.26172837301939,3651948,1756997699,-95.24715711171521,2532.3875205516815,1455,49.54
+cda-server-2,False,2572.3899228572845,"{'sample_time_ms': 39874.653, 'num_steps_trained': 73200, 'grad_time_ms': 373.813, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1214.6409912109375, 'policy_loss': -0.11584869027137756, 'vf_explained_var': -0.7653178572654724, 'entropy': 14.912324905395508, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1214.54638671875, 'kl': 0.014048927463591099}, 'load_time_ms': 0.691, 'num_steps_sampled': 73200, 'update_time_ms': 2.639}",61,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.00240230560303,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,73200,73200,{},61,24,-99.60113338733126,2025-09-04_16-55-39,-39.9027328754405,3651948,1756997739,-94.50576187376137,2572.3899228572845,1479,49.3
+cda-server-2,False,2612.1271228790283,"{'sample_time_ms': 39778.975, 'num_steps_trained': 74400, 'grad_time_ms': 375.838, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1259.583984375, 'policy_loss': -0.11178527772426605, 'vf_explained_var': -0.7795595526695251, 'entropy': 14.82375431060791, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1259.4942626953125, 'kl': 0.014546235091984272}, 'load_time_ms': 0.695, 'num_steps_sampled': 74400, 'update_time_ms': 2.646}",62,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.737200021743774,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,74400,74400,{},62,25,-99.97254170911407,2025-09-04_16-56-19,2.1591405978752833,3651948,1756997779,-93.35622147695085,2612.1271228790283,1504,48.81
+cda-server-2,False,2652.1977066993713,"{'sample_time_ms': 39763.604, 'num_steps_trained': 75600, 'grad_time_ms': 373.908, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1356.56787109375, 'policy_loss': -0.10862504690885544, 'vf_explained_var': -0.7592952847480774, 'entropy': 14.951154708862305, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1356.4798583984375, 'kl': 0.013558438047766685}, 'load_time_ms': 0.694, 'num_steps_sampled': 75600, 'update_time_ms': 2.647}",63,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.07058382034302,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,75600,75600,{},63,25,-99.97254170911407,2025-09-04_16-56-59,6.000001326755738,3651948,1756997819,-92.5705083925217,2652.1977066993713,1529,48.41
+cda-server-2,False,2692.412809610367,"{'sample_time_ms': 39792.826, 'num_steps_trained': 76800, 'grad_time_ms': 372.736, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1189.119873046875, 'policy_loss': -0.12759803235530853, 'vf_explained_var': -0.7264623045921326, 'entropy': 14.669437408447266, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1189.014404296875, 'kl': 0.01455807313323021}, 'load_time_ms': 0.697, 'num_steps_sampled': 76800, 'update_time_ms': 2.635}",64,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.21510291099548,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,76800,76800,{},64,24,-99.97254170911407,2025-09-04_16-57-40,6.000001326755738,3651948,1756997860,-92.60180417143867,2692.412809610367,1553,48.39
+cda-server-2,False,2732.490079641342,"{'sample_time_ms': 39797.621, 'num_steps_trained': 78000, 'grad_time_ms': 370.136, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1244.51416015625, 'policy_loss': -0.11783421039581299, 'vf_explained_var': -0.7883577942848206, 'entropy': 14.744547843933105, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1244.4168701171875, 'kl': 0.013561917468905449}, 'load_time_ms': 0.677, 'num_steps_sampled': 78000, 'update_time_ms': 2.622}",65,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.07727003097534,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,78000,78000,{},65,25,-99.97254170911407,2025-09-04_16-58-20,6.000001326755738,3651948,1756997900,-92.77911973728547,2732.490079641342,1578,48.36
+cda-server-2,False,2772.6015956401825,"{'sample_time_ms': 39828.28, 'num_steps_trained': 79200, 'grad_time_ms': 370.099, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1261.6090087890625, 'policy_loss': -0.11495360732078552, 'vf_explained_var': -0.7529252171516418, 'entropy': 14.982555389404297, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1261.513427734375, 'kl': 0.012708180584013462}, 'load_time_ms': 0.676, 'num_steps_sampled': 79200, 'update_time_ms': 2.561}",66,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.11151599884033,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,79200,79200,{},66,25,-99.72677078360388,2025-09-04_16-59-00,6.000001326755738,3651948,1756997940,-93.05071050827317,2772.6015956401825,1603,48.6
+cda-server-2,False,2812.351597547531,"{'sample_time_ms': 39820.574, 'num_steps_trained': 80400, 'grad_time_ms': 370.846, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1198.54150390625, 'policy_loss': -0.12078271806240082, 'vf_explained_var': -0.8323256969451904, 'entropy': 14.662151336669922, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1198.443603515625, 'kl': 0.014926041476428509}, 'load_time_ms': 0.682, 'num_steps_sampled': 80400, 'update_time_ms': 2.553}",67,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.75000190734863,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,80400,80400,{},67,25,-99.72677078360388,2025-09-04_16-59-39,-33.96508927336994,3651948,1756997979,-94.36946534877416,2812.351597547531,1628,49.14
+cda-server-2,False,2852.031061410904,"{'sample_time_ms': 39645.242, 'num_steps_trained': 81600, 'grad_time_ms': 372.573, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1221.2003173828125, 'policy_loss': -0.10780903697013855, 'vf_explained_var': -0.8038766384124756, 'entropy': 14.78492546081543, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1221.1131591796875, 'kl': 0.013653003610670567}, 'load_time_ms': 0.686, 'num_steps_sampled': 81600, 'update_time_ms': 2.537}",68,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.6794638633728,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,81600,81600,{},68,24,-99.58787226122642,2025-09-04_17-00-19,-25.2159638771289,3651948,1756998019,-94.04441640538226,2852.031061410904,1652,48.97
+cda-server-2,False,2891.8031606674194,"{'sample_time_ms': 39547.375, 'num_steps_trained': 82800, 'grad_time_ms': 370.732, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1223.3162841796875, 'policy_loss': -0.11841960996389389, 'vf_explained_var': -0.8032306432723999, 'entropy': 14.663142204284668, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1223.2208251953125, 'kl': 0.015099359676241875}, 'load_time_ms': 0.685, 'num_steps_sampled': 82800, 'update_time_ms': 2.526}",69,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.7720992565155,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,82800,82800,{},69,25,-99.99162556002155,2025-09-04_17-00-59,-0.7702540579181019,3651948,1756998059,-93.08991260626364,2891.8031606674194,1677,48.65
+cda-server-2,False,2931.6492550373077,"{'sample_time_ms': 39548.229, 'num_steps_trained': 84000, 'grad_time_ms': 370.219, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1172.17431640625, 'policy_loss': -0.1396070122718811, 'vf_explained_var': -0.7034938335418701, 'entropy': 14.607905387878418, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1172.0589599609375, 'kl': 0.015890225768089294}, 'load_time_ms': 0.671, 'num_steps_sampled': 84000, 'update_time_ms': 2.544}",70,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.846094369888306,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,84000,84000,{},70,24,-99.99162556002155,2025-09-04_17-01-39,-0.7702540579181019,3651948,1756998099,-93.36784562070854,2931.6492550373077,1701,48.68
+cda-server-2,False,2971.3676204681396,"{'sample_time_ms': 39519.912, 'num_steps_trained': 85200, 'grad_time_ms': 370.109, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1292.23876953125, 'policy_loss': -0.1328306645154953, 'vf_explained_var': -0.7532870769500732, 'entropy': 14.943361282348633, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1292.128173828125, 'kl': 0.014537609182298183}, 'load_time_ms': 0.674, 'num_steps_sampled': 85200, 'update_time_ms': 2.531}",71,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.71836543083191,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,85200,85200,{},71,24,-99.99162556002155,2025-09-04_17-02-19,-0.7702540579181019,3651948,1756998139,-92.87745475202908,2971.3676204681396,1725,48.64
+cda-server-2,False,3011.002952814102,"{'sample_time_ms': 39510.118, 'num_steps_trained': 86400, 'grad_time_ms': 369.717, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1240.18310546875, 'policy_loss': -0.11488083750009537, 'vf_explained_var': -0.6741650104522705, 'entropy': 14.941850662231445, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1240.0899658203125, 'kl': 0.014264964498579502}, 'load_time_ms': 0.671, 'num_steps_sampled': 86400, 'update_time_ms': 2.534}",72,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.635332345962524,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,86400,86400,{},72,27,-99.99162556002155,2025-09-04_17-02-58,-0.7702540579181019,3651948,1756998178,-91.20535606157559,3011.002952814102,1752,48.02
+cda-server-2,False,3050.5706675052643,"{'sample_time_ms': 39458.188, 'num_steps_trained': 87600, 'grad_time_ms': 371.345, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1113.804443359375, 'policy_loss': -0.11882533133029938, 'vf_explained_var': -0.7840087413787842, 'entropy': 14.758036613464355, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1113.7073974609375, 'kl': 0.014246370643377304}, 'load_time_ms': 0.665, 'num_steps_sampled': 87600, 'update_time_ms': 2.549}",73,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.56771469116211,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,87600,87600,{},73,24,-99.6792457992007,2025-09-04_17-03-38,-15.576939134117044,3651948,1756998218,-92.18438998251895,3050.5706675052643,1776,48.52
+cda-server-2,False,3091.4167096614838,"{'sample_time_ms': 39521.164, 'num_steps_trained': 88800, 'grad_time_ms': 371.391, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1172.51708984375, 'policy_loss': -0.12917476892471313, 'vf_explained_var': -0.7146407961845398, 'entropy': 14.7467041015625, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1172.4105224609375, 'kl': 0.014921224676072598}, 'load_time_ms': 0.674, 'num_steps_sampled': 88800, 'update_time_ms': 2.571}",74,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.84604215621948,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,88800,88800,{},74,25,-99.6792457992007,2025-09-04_17-04-19,-3.999500710162776,3651948,1756998259,-91.32126179142608,3091.4167096614838,1801,48.21
+cda-server-2,False,3131.535984277725,"{'sample_time_ms': 39524.336, 'num_steps_trained': 90000, 'grad_time_ms': 372.342, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1171.7374267578125, 'policy_loss': -0.11464173346757889, 'vf_explained_var': -0.7707966566085815, 'entropy': 14.599848747253418, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1171.64208984375, 'kl': 0.012648598290979862}, 'load_time_ms': 0.679, 'num_steps_sampled': 90000, 'update_time_ms': 2.587}",75,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.119274616241455,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,90000,90000,{},75,24,-99.63079658416484,2025-09-04_17-04-59,-3.999500710162776,3651948,1756998299,-92.15717888342807,3131.535984277725,1825,48.44
+cda-server-2,False,3171.6466183662415,"{'sample_time_ms': 39524.771, 'num_steps_trained': 91200, 'grad_time_ms': 371.805, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1277.7818603515625, 'policy_loss': -0.11734982579946518, 'vf_explained_var': -0.7334659099578857, 'entropy': 14.809412956237793, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1277.6856689453125, 'kl': 0.01380773726850748}, 'load_time_ms': 0.674, 'num_steps_sampled': 91200, 'update_time_ms': 2.622}",76,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.110634088516235,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,91200,91200,{},76,24,-99.63079658416484,2025-09-04_17-05-39,-3.999500710162776,3651948,1756998339,-92.27201614128585,3171.6466183662415,1849,48.75
+cda-server-2,False,3211.787467956543,"{'sample_time_ms': 39563.643, 'num_steps_trained': 92400, 'grad_time_ms': 372.018, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1321.55908203125, 'policy_loss': -0.12810860574245453, 'vf_explained_var': -0.7949018478393555, 'entropy': 14.77613353729248, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1321.45458984375, 'kl': 0.015502896159887314}, 'load_time_ms': 0.678, 'num_steps_sampled': 92400, 'update_time_ms': 2.623}",77,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.140849590301514,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,92400,92400,{},77,25,-98.73718296318454,2025-09-04_17-06-19,-3.999500710162776,3651948,1756998379,-92.93486778479472,3211.787467956543,1874,49.01
+cda-server-2,False,3252.4975650310516,"{'sample_time_ms': 39667.332, 'num_steps_trained': 93600, 'grad_time_ms': 371.34, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1272.25634765625, 'policy_loss': -0.12087935954332352, 'vf_explained_var': -0.7115300297737122, 'entropy': 14.752005577087402, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1272.1575927734375, 'kl': 0.014506997540593147}, 'load_time_ms': 0.675, 'num_steps_sampled': 93600, 'update_time_ms': 2.629}",78,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.71009707450867,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,93600,93600,{},78,24,-98.6604206333207,2025-09-04_17-07-00,-55.892082802026835,3651948,1756998420,-94.11221293511282,3252.4975650310516,1898,49.55
+cda-server-2,False,3293.6832132339478,"{'sample_time_ms': 39806.354, 'num_steps_trained': 94800, 'grad_time_ms': 373.642, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1292.762939453125, 'policy_loss': -0.11956813931465149, 'vf_explained_var': -0.7240657806396484, 'entropy': 14.862645149230957, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1292.663818359375, 'kl': 0.013549041002988815}, 'load_time_ms': 0.671, 'num_steps_sampled': 94800, 'update_time_ms': 2.652}",79,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.18564820289612,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,94800,94800,{},79,24,-98.6604206333207,2025-09-04_17-07-41,-55.892082802026835,3651948,1756998461,-93.72617925382933,3293.6832132339478,1922,49.53
+cda-server-2,False,3333.982837200165,"{'sample_time_ms': 39853.736, 'num_steps_trained': 96000, 'grad_time_ms': 371.603, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1276.9134521484375, 'policy_loss': -0.11261190474033356, 'vf_explained_var': -0.7686378955841064, 'entropy': 15.086308479309082, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1276.8209228515625, 'kl': 0.013302515260875225}, 'load_time_ms': 0.668, 'num_steps_sampled': 96000, 'update_time_ms': 2.649}",80,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.29962396621704,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,96000,96000,{},80,26,-98.6604206333207,2025-09-04_17-08-21,-8.733419482830186,3651948,1756998501,-93.15560433947206,3333.982837200165,1948,49.12
+cda-server-2,False,3374.2282209396362,"{'sample_time_ms': 39908.36, 'num_steps_trained': 97200, 'grad_time_ms': 369.724, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1151.8021240234375, 'policy_loss': -0.11964704096317291, 'vf_explained_var': -0.6800518035888672, 'entropy': 14.558051109313965, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1151.703125, 'kl': 0.013682969845831394}, 'load_time_ms': 0.672, 'num_steps_sampled': 97200, 'update_time_ms': 2.634}",81,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.245383739471436,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,97200,97200,{},81,24,-99.26104626550475,2025-09-04_17-09-02,-8.733419482830186,3651948,1756998542,-93.1911542427198,3374.2282209396362,1972,49.15
+cda-server-2,False,3413.907021045685,"{'sample_time_ms': 39914.318, 'num_steps_trained': 98400, 'grad_time_ms': 368.136, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1271.07568359375, 'policy_loss': -0.11927060037851334, 'vf_explained_var': -0.7267799973487854, 'entropy': 14.841487884521484, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1270.9776611328125, 'kl': 0.013944336213171482}, 'load_time_ms': 0.672, 'num_steps_sampled': 98400, 'update_time_ms': 2.608}",82,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.678800106048584,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,98400,98400,{},82,24,-99.26104626550475,2025-09-04_17-09-41,-8.733419482830186,3651948,1756998581,-93.12007318425577,3413.907021045685,1996,49.15
+cda-server-2,False,3453.756364107132,"{'sample_time_ms': 39943.947, 'num_steps_trained': 99600, 'grad_time_ms': 366.691, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1233.8282470703125, 'policy_loss': -0.12820306420326233, 'vf_explained_var': -0.7392103672027588, 'entropy': 14.829949378967285, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1233.7203369140625, 'kl': 0.013510401360690594}, 'load_time_ms': 0.678, 'num_steps_sampled': 99600, 'update_time_ms': 2.609}",83,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.849343061447144,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,99600,99600,{},83,25,-99.26104626550475,2025-09-04_17-10-21,-8.733419482830186,3651948,1756998621,-92.26810527420496,3453.756364107132,2021,48.8
+cda-server-2,False,3493.5548133850098,"{'sample_time_ms': 39839.005, 'num_steps_trained': 100800, 'grad_time_ms': 366.936, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1215.7237548828125, 'policy_loss': -0.11922930181026459, 'vf_explained_var': -0.7129600048065186, 'entropy': 14.783453941345215, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1215.6236572265625, 'kl': 0.012615455314517021}, 'load_time_ms': 0.678, 'num_steps_sampled': 100800, 'update_time_ms': 2.588}",84,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.79844927787781,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,100800,100800,{},84,24,-99.26104626550475,2025-09-04_17-11-01,-13.917609120055879,3651948,1756998661,-92.80197099116748,3493.5548133850098,2045,49.17
+cda-server-2,False,3533.712546348572,"{'sample_time_ms': 39842.159, 'num_steps_trained': 102000, 'grad_time_ms': 367.639, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1278.0333251953125, 'policy_loss': -0.1087045967578888, 'vf_explained_var': -0.7911555767059326, 'entropy': 14.793651580810547, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1277.946044921875, 'kl': 0.014129284769296646}, 'load_time_ms': 0.672, 'num_steps_sampled': 102000, 'update_time_ms': 2.585}",85,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.15773296356201,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,102000,102000,{},85,25,-98.52128822180254,2025-09-04_17-11-41,-13.917609120055879,3651948,1756998701,-92.42686715994115,3533.712546348572,2070,48.98
+cda-server-2,False,3573.3178622722626,"{'sample_time_ms': 39791.294, 'num_steps_trained': 103200, 'grad_time_ms': 367.951, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1206.2105712890625, 'policy_loss': -0.11693794280290604, 'vf_explained_var': -0.7791456580162048, 'entropy': 14.77348518371582, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1206.1171875, 'kl': 0.015465127304196358}, 'load_time_ms': 0.669, 'num_steps_sampled': 103200, 'update_time_ms': 2.611}",86,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.605315923690796,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,103200,103200,{},86,25,-98.52128822180254,2025-09-04_17-12-21,-13.917609120055879,3651948,1756998741,-91.01069362105486,3573.3178622722626,2095,48.48
+cda-server-2,False,3613.9034507274628,"{'sample_time_ms': 39835.727, 'num_steps_trained': 104400, 'grad_time_ms': 368.041, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1148.5584716796875, 'policy_loss': -0.12584802508354187, 'vf_explained_var': -0.7068888545036316, 'entropy': 14.55543327331543, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1148.4549560546875, 'kl': 0.014751442708075047}, 'load_time_ms': 0.67, 'num_steps_sampled': 104400, 'update_time_ms': 2.583}",87,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.585588455200195,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,104400,104400,{},87,25,-98.72888046726543,2025-09-04_17-13-02,-25.99462355474143,3651948,1756998782,-90.95479067292243,3613.9034507274628,2120,48.46
+cda-server-2,False,3654.366242647171,"{'sample_time_ms': 39810.132, 'num_steps_trained': 105600, 'grad_time_ms': 368.977, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1196.8033447265625, 'policy_loss': -0.12734419107437134, 'vf_explained_var': -0.647044837474823, 'entropy': 14.450883865356445, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1196.6951904296875, 'kl': 0.012654243037104607}, 'load_time_ms': 0.676, 'num_steps_sampled': 105600, 'update_time_ms': 2.584}",88,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.46279191970825,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,105600,105600,{},88,24,-98.77030807707597,2025-09-04_17-13-42,-5.722962107342848,3651948,1756998822,-90.80268153204094,3654.366242647171,2144,48.24
+cda-server-2,False,3694.164056777954,"{'sample_time_ms': 39671.88, 'num_steps_trained': 106800, 'grad_time_ms': 368.472, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1234.7669677734375, 'policy_loss': -0.12505359947681427, 'vf_explained_var': -0.7778708338737488, 'entropy': 14.69221305847168, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1234.66162109375, 'kl': 0.012969114817678928}, 'load_time_ms': 0.685, 'num_steps_sampled': 106800, 'update_time_ms': 2.551}",89,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.79781413078308,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,106800,106800,{},89,26,-99.54022066114896,2025-09-04_17-14-22,-5.722962107342848,3651948,1756998862,-89.86875535557246,3694.164056777954,2170,47.89
+cda-server-2,False,3733.7502102851868,"{'sample_time_ms': 39599.99, 'num_steps_trained': 108000, 'grad_time_ms': 369.036, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1285.6556396484375, 'policy_loss': -0.12674559652805328, 'vf_explained_var': -0.8289951682090759, 'entropy': 14.869439125061035, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1285.5496826171875, 'kl': 0.01376924104988575}, 'load_time_ms': 0.685, 'num_steps_sampled': 108000, 'update_time_ms': 2.537}",90,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.586153507232666,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,108000,108000,{},90,24,-99.54022066114896,2025-09-04_17-15-01,-5.722962107342848,3651948,1756998901,-91.13390358708877,3733.7502102851868,2194,48.39
+cda-server-2,False,3774.2825310230255,"{'sample_time_ms': 39628.094, 'num_steps_trained': 109200, 'grad_time_ms': 369.618, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1224.0098876953125, 'policy_loss': -0.10955886542797089, 'vf_explained_var': -0.7773178815841675, 'entropy': 14.64888858795166, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1223.9169921875, 'kl': 0.011099190451204777}, 'load_time_ms': 0.702, 'num_steps_sampled': 109200, 'update_time_ms': 2.519}",91,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.532320737838745,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,109200,109200,{},91,25,-99.54022066114896,2025-09-04_17-15-42,-5.722962107342848,3651948,1756998942,-91.20847519918264,3774.2825310230255,2219,48.4
+cda-server-2,False,3814.0616085529327,"{'sample_time_ms': 39634.879, 'num_steps_trained': 110400, 'grad_time_ms': 372.838, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1189.8040771484375, 'policy_loss': -0.12425579130649567, 'vf_explained_var': -0.7224305272102356, 'entropy': 14.687872886657715, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1189.700927734375, 'kl': 0.013974593952298164}, 'load_time_ms': 0.705, 'num_steps_sampled': 110400, 'update_time_ms': 2.504}",92,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.77907752990723,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,110400,110400,{},92,26,-99.54022066114896,2025-09-04_17-16-22,1.4474787914261587,3651948,1756998982,-89.48766277537257,3814.0616085529327,2245,47.8
+cda-server-2,False,3854.6090116500854,"{'sample_time_ms': 39702.751, 'num_steps_trained': 111600, 'grad_time_ms': 374.732, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1208.7021484375, 'policy_loss': -0.11913042515516281, 'vf_explained_var': -0.6939985752105713, 'entropy': 14.28215503692627, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1208.60400390625, 'kl': 0.013752754777669907}, 'load_time_ms': 0.7, 'num_steps_sampled': 111600, 'update_time_ms': 2.531}",93,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.54740309715271,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,111600,111600,{},93,24,-99.36198879809118,2025-09-04_17-17-02,1.4474787914261587,3651948,1756999022,-90.58949901698992,3854.6090116500854,2269,48.22
+cda-server-2,False,3894.819942712784,"{'sample_time_ms': 39744.693, 'num_steps_trained': 112800, 'grad_time_ms': 374.001, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1184.99560546875, 'policy_loss': -0.12463506311178207, 'vf_explained_var': -0.643320620059967, 'entropy': 14.60263442993164, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1184.8944091796875, 'kl': 0.015387635678052902}, 'load_time_ms': 0.691, 'num_steps_sampled': 112800, 'update_time_ms': 2.53}",94,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.210931062698364,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,112800,112800,{},94,25,-99.36198879809118,2025-09-04_17-17-43,1.4474787914261587,3651948,1756999063,-89.95456546353162,3894.819942712784,2294,48.04
+cda-server-2,False,3934.89609003067,"{'sample_time_ms': 39737.281, 'num_steps_trained': 114000, 'grad_time_ms': 373.233, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1267.3551025390625, 'policy_loss': -0.12714111804962158, 'vf_explained_var': -0.7729015350341797, 'entropy': 14.54859447479248, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1267.2496337890625, 'kl': 0.014290733262896538}, 'load_time_ms': 0.689, 'num_steps_sampled': 114000, 'update_time_ms': 2.541}",95,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.07614731788635,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,114000,114000,{},95,25,-99.36198879809118,2025-09-04_17-18-23,1.4474787914261587,3651948,1756999103,-89.21487080403091,3934.89609003067,2319,47.81
+cda-server-2,False,3974.9890925884247,"{'sample_time_ms': 39785.839, 'num_steps_trained': 115200, 'grad_time_ms': 373.399, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1199.9796142578125, 'policy_loss': -0.12493264675140381, 'vf_explained_var': -0.7098046541213989, 'entropy': 14.439332962036133, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1199.87451171875, 'kl': 0.013081979006528854}, 'load_time_ms': 0.695, 'num_steps_sampled': 115200, 'update_time_ms': 2.574}",96,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.09300255775452,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,115200,115200,{},96,25,-99.36198879809118,2025-09-04_17-19-03,-28.275172311855314,3651948,1756999143,-90.65294534009193,3974.9890925884247,2344,48.51
+cda-server-2,False,4014.6931591033936,"{'sample_time_ms': 39698.153, 'num_steps_trained': 116400, 'grad_time_ms': 373.004, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1210.350341796875, 'policy_loss': -0.11625361442565918, 'vf_explained_var': -0.7325482368469238, 'entropy': 14.540125846862793, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1210.2540283203125, 'kl': 0.013076062314212322}, 'load_time_ms': 0.68, 'num_steps_sampled': 116400, 'update_time_ms': 2.558}",97,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.70406651496887,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,116400,116400,{},97,24,-97.95559873759191,2025-09-04_17-19-43,-35.95092867650534,3651948,1756999183,-91.17967484303084,4014.6931591033936,2368,48.85
+cda-server-2,False,4055.768358230591,"{'sample_time_ms': 39759.339, 'num_steps_trained': 117600, 'grad_time_ms': 373.009, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1190.3453369140625, 'policy_loss': -0.11981771886348724, 'vf_explained_var': -0.6935294270515442, 'entropy': 14.405661582946777, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1190.245849609375, 'kl': 0.013433661311864853}, 'load_time_ms': 0.685, 'num_steps_sampled': 117600, 'update_time_ms': 2.552}",98,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.075199127197266,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,117600,117600,{},98,25,-97.95559873759191,2025-09-04_17-20-24,0.5107333925751831,3651948,1756999224,-89.58005055925892,4055.768358230591,2393,48.15
+cda-server-2,False,4095.5292184352875,"{'sample_time_ms': 39757.793, 'num_steps_trained': 118800, 'grad_time_ms': 370.868, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1296.869140625, 'policy_loss': -0.1273031383752823, 'vf_explained_var': -0.7066032886505127, 'entropy': 14.593509674072266, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1296.7630615234375, 'kl': 0.013859516941010952}, 'load_time_ms': 0.679, 'num_steps_sampled': 118800, 'update_time_ms': 2.572}",99,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.760860204696655,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,118800,118800,{},99,24,-97.95559873759191,2025-09-04_17-21-03,0.5107333925751831,3651948,1756999263,-90.90190257334996,4095.5292184352875,2417,48.75
+cda-server-2,False,4135.280607700348,"{'sample_time_ms': 39771.754, 'num_steps_trained': 120000, 'grad_time_ms': 373.397, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1311.7376708984375, 'policy_loss': -0.13152579963207245, 'vf_explained_var': -0.6952612996101379, 'entropy': 14.473356246948242, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1311.62744140625, 'kl': 0.014029532670974731}, 'load_time_ms': 0.689, 'num_steps_sampled': 120000, 'update_time_ms': 2.588}",100,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.751389265060425,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,120000,120000,{},100,25,-98.5273936104996,2025-09-04_17-21-43,0.5107333925751831,3651948,1756999303,-90.455652480023,4135.280607700348,2442,48.5
+cda-server-2,False,4175.344631195068,"{'sample_time_ms': 39722.412, 'num_steps_trained': 121200, 'grad_time_ms': 375.911, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1326.9588623046875, 'policy_loss': -0.14080630242824554, 'vf_explained_var': -0.7763766050338745, 'entropy': 14.496570587158203, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1326.8421630859375, 'kl': 0.015901949256658554}, 'load_time_ms': 0.675, 'num_steps_sampled': 121200, 'update_time_ms': 2.616}",101,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.06402349472046,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,121200,121200,{},101,26,-98.5273936104996,2025-09-04_17-22-23,6.000664555683574,3651948,1756999343,-88.72845558193077,4175.344631195068,2468,47.76
+cda-server-2,False,4215.645416736603,"{'sample_time_ms': 39774.876, 'num_steps_trained': 122400, 'grad_time_ms': 375.573, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1194.6558837890625, 'policy_loss': -0.1253173053264618, 'vf_explained_var': -0.7658072710037231, 'entropy': 14.514446258544922, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1194.5496826171875, 'kl': 0.012578372843563557}, 'load_time_ms': 0.679, 'num_steps_sampled': 122400, 'update_time_ms': 2.664}",102,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.300785541534424,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,122400,122400,{},102,25,-98.5273936104996,2025-09-04_17-23-04,6.000664555683574,3651948,1756999384,-89.66162941043045,4215.645416736603,2493,48.25
+cda-server-2,False,4255.7618935108185,"{'sample_time_ms': 39733.237, 'num_steps_trained': 123600, 'grad_time_ms': 374.02, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1156.4312744140625, 'policy_loss': -0.1374259740114212, 'vf_explained_var': -0.6683142781257629, 'entropy': 14.21220874786377, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1156.3155517578125, 'kl': 0.014314512722194195}, 'load_time_ms': 0.678, 'num_steps_sampled': 123600, 'update_time_ms': 2.677}",103,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.1164767742157,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,123600,123600,{},103,25,-98.5273936104996,2025-09-04_17-23-44,6.000664555683574,3651948,1756999424,-88.23683398251121,4255.7618935108185,2518,47.59
+cda-server-2,False,4297.668802499771,"{'sample_time_ms': 39901.639, 'num_steps_trained': 124800, 'grad_time_ms': 375.174, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1237.98388671875, 'policy_loss': -0.12278148531913757, 'vf_explained_var': -0.6365931630134583, 'entropy': 14.33677864074707, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1237.88037109375, 'kl': 0.012673246674239635}, 'load_time_ms': 0.697, 'num_steps_sampled': 124800, 'update_time_ms': 2.728}",104,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.90690898895264,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,124800,124800,{},104,28,-97.46309625411676,2025-09-04_17-24-26,6.000664555683574,3651948,1756999466,-84.16640483824541,4297.668802499771,2546,45.78
+cda-server-2,False,4337.651317119598,"{'sample_time_ms': 39889.857, 'num_steps_trained': 126000, 'grad_time_ms': 377.575, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1278.95947265625, 'policy_loss': -0.12983882427215576, 'vf_explained_var': -0.6369035840034485, 'entropy': 14.396255493164062, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1278.85009765625, 'kl': 0.013471391052007675}, 'load_time_ms': 0.715, 'num_steps_sampled': 126000, 'update_time_ms': 2.716}",105,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.98251461982727,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,126000,126000,{},105,25,-97.45426535558042,2025-09-04_17-25-06,6.000003544694097,3651948,1756999506,-85.2422938899056,4337.651317119598,2571,46.3
+cda-server-2,False,4378.0724902153015,"{'sample_time_ms': 39924.688, 'num_steps_trained': 127200, 'grad_time_ms': 375.586, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1178.966552734375, 'policy_loss': -0.1314598172903061, 'vf_explained_var': -0.6940706968307495, 'entropy': 14.413055419921875, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1178.8572998046875, 'kl': 0.014483694918453693}, 'load_time_ms': 0.708, 'num_steps_sampled': 127200, 'update_time_ms': 2.682}",106,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.421173095703125,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,127200,127200,{},106,28,-97.15961541343583,2025-09-04_17-25-46,6.000003544694097,3651948,1756999546,-82.70880042113075,4378.0724902153015,2599,45.08
+cda-server-2,False,4419.78364610672,"{'sample_time_ms': 40123.747, 'num_steps_trained': 128400, 'grad_time_ms': 377.182, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1233.5357666015625, 'policy_loss': -0.12721286714076996, 'vf_explained_var': -0.7277848720550537, 'entropy': 14.43802261352539, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1233.4302978515625, 'kl': 0.014336716383695602}, 'load_time_ms': 0.729, 'num_steps_sampled': 128400, 'update_time_ms': 2.697}",107,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.71115589141846,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,128400,128400,{},107,25,-98.2978558218741,2025-09-04_17-26-28,2.1440509234017577,3651948,1756999588,-84.86409343511512,4419.78364610672,2624,46.06
+cda-server-2,False,4460.092601776123,"{'sample_time_ms': 40046.852, 'num_steps_trained': 129600, 'grad_time_ms': 377.472, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1257.62548828125, 'policy_loss': -0.12686073780059814, 'vf_explained_var': -0.6250575184822083, 'entropy': 14.22714614868164, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1257.5218505859375, 'kl': 0.015301553532481194}, 'load_time_ms': 0.724, 'num_steps_sampled': 129600, 'update_time_ms': 2.693}",108,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.308955669403076,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,129600,129600,{},108,25,-98.2978558218741,2025-09-04_17-27-08,3.0193488702176747,3651948,1756999628,-86.37986970879447,4460.092601776123,2649,46.67
+cda-server-2,False,4500.163435935974,"{'sample_time_ms': 40077.132, 'num_steps_trained': 130800, 'grad_time_ms': 378.158, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1223.0604248046875, 'policy_loss': -0.13701944053173065, 'vf_explained_var': -0.6192005276679993, 'entropy': 14.382627487182617, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1222.9482421875, 'kl': 0.016335275024175644}, 'load_time_ms': 0.723, 'num_steps_sampled': 130800, 'update_time_ms': 2.697}",109,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.070834159851074,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,130800,130800,{},109,26,-98.2978558218741,2025-09-04_17-27-48,3.0193488702176747,3651948,1756999668,-86.45667733670398,4500.163435935974,2675,46.77
+cda-server-2,False,4541.299084186554,"{'sample_time_ms': 40215.232, 'num_steps_trained': 132000, 'grad_time_ms': 378.476, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1271.205322265625, 'policy_loss': -0.13974149525165558, 'vf_explained_var': -0.5736344456672668, 'entropy': 14.173750877380371, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1271.0859375, 'kl': 0.01340469066053629}, 'load_time_ms': 0.725, 'num_steps_sampled': 132000, 'update_time_ms': 2.696}",110,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.135648250579834,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,132000,132000,{},110,27,-99.64931377321552,2025-09-04_17-28-29,3.0193488702176747,3651948,1756999709,-85.65938928896858,4541.299084186554,2702,46.35
+cda-server-2,False,4581.27139878273,"{'sample_time_ms': 40206.484, 'num_steps_trained': 133200, 'grad_time_ms': 378.024, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1239.5079345703125, 'policy_loss': -0.12589646875858307, 'vf_explained_var': -0.6094751954078674, 'entropy': 14.06795883178711, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1239.40234375, 'kl': 0.013416077941656113}, 'load_time_ms': 0.727, 'num_steps_sampled': 133200, 'update_time_ms': 2.708}",111,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.97231459617615,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,133200,133200,{},111,24,-99.64931377321552,2025-09-04_17-29-10,3.0193488702176747,3651948,1756999750,-85.355216547187,4581.27139878273,2726,46.29
+cda-server-2,False,4621.051965236664,"{'sample_time_ms': 40156.241, 'num_steps_trained': 134400, 'grad_time_ms': 376.258, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1339.635009765625, 'policy_loss': -0.1412985920906067, 'vf_explained_var': -0.592364490032196, 'entropy': 14.377864837646484, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1339.51513671875, 'kl': 0.014054707251489162}, 'load_time_ms': 0.718, 'num_steps_sampled': 134400, 'update_time_ms': 2.694}",112,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.780566453933716,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,134400,134400,{},112,25,-99.64931377321552,2025-09-04_17-29-49,-1.849715851617404,3651948,1756999789,-86.26141017081436,4621.051965236664,2751,46.8
+cda-server-2,False,4661.2936680316925,"{'sample_time_ms': 40167.92, 'num_steps_trained': 135600, 'grad_time_ms': 377.197, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1229.363037109375, 'policy_loss': -0.1360459327697754, 'vf_explained_var': -0.5663503408432007, 'entropy': 14.140381813049316, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1229.2481689453125, 'kl': 0.013758447952568531}, 'load_time_ms': 0.727, 'num_steps_sampled': 135600, 'update_time_ms': 2.661}",113,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.24170279502869,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,135600,135600,{},113,28,-99.64931377321552,2025-09-04_17-30-30,-1.849715851617404,3651948,1756999830,-84.13588032368352,4661.2936680316925,2779,45.91
+cda-server-2,False,4701.200223684311,"{'sample_time_ms': 39968.296, 'num_steps_trained': 136800, 'grad_time_ms': 376.908, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1215.121337890625, 'policy_loss': -0.1374468207359314, 'vf_explained_var': -0.622351884841919, 'entropy': 14.042511940002441, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1215.00537109375, 'kl': 0.014193039387464523}, 'load_time_ms': 0.713, 'num_steps_sampled': 136800, 'update_time_ms': 2.603}",114,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.90655565261841,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,136800,136800,{},114,27,-97.28639060982673,2025-09-04_17-31-09,2.000004349898961,3651948,1756999869,-84.21676647970006,4701.200223684311,2806,46.08
+cda-server-2,False,4740.942209243774,"{'sample_time_ms': 39946.04, 'num_steps_trained': 138000, 'grad_time_ms': 375.175, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1228.0399169921875, 'policy_loss': -0.1427999883890152, 'vf_explained_var': -0.6482807993888855, 'entropy': 14.210469245910645, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1227.91748046875, 'kl': 0.013438764959573746}, 'load_time_ms': 0.719, 'num_steps_sampled': 138000, 'update_time_ms': 2.585}",115,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.7419855594635,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,138000,138000,{},115,25,-97.46278423272653,2025-09-04_17-31-49,2.000004349898961,3651948,1756999909,-84.02052648087194,4740.942209243774,2831,46.04
+cda-server-2,False,4780.744037866592,"{'sample_time_ms': 39880.803, 'num_steps_trained': 139200, 'grad_time_ms': 378.501, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1398.448974609375, 'policy_loss': -0.13323861360549927, 'vf_explained_var': -0.7571742534637451, 'entropy': 14.20920467376709, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1398.337646484375, 'kl': 0.014472413808107376}, 'load_time_ms': 0.736, 'num_steps_sampled': 139200, 'update_time_ms': 2.536}",116,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.80182862281799,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,139200,139200,{},116,24,-97.46278423272653,2025-09-04_17-32-29,2.000004349898961,3651948,1756999949,-84.00878538868815,4780.744037866592,2855,46.0
+cda-server-2,False,4820.50555896759,"{'sample_time_ms': 39688.761, 'num_steps_trained': 140400, 'grad_time_ms': 375.63, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1149.515380859375, 'policy_loss': -0.12735703587532043, 'vf_explained_var': -0.6808863878250122, 'entropy': 13.910858154296875, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1149.405517578125, 'kl': 0.01153584010899067}, 'load_time_ms': 0.717, 'num_steps_sampled': 140400, 'update_time_ms': 2.537}",117,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.761521100997925,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,140400,140400,{},117,27,-97.46278423272653,2025-09-04_17-33-09,-2.9198034618987947,3651948,1756999989,-86.9114394525108,4820.50555896759,2882,47.45
+cda-server-2,False,4860.424870014191,"{'sample_time_ms': 39652.325, 'num_steps_trained': 141600, 'grad_time_ms': 373.139, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1287.9305419921875, 'policy_loss': -0.13839703798294067, 'vf_explained_var': -0.6928651332855225, 'entropy': 14.062501907348633, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1287.81640625, 'kl': 0.01598420925438404}, 'load_time_ms': 0.713, 'num_steps_sampled': 141600, 'update_time_ms': 2.516}",118,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.91931104660034,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,141600,141600,{},118,26,-97.46278423272653,2025-09-04_17-33-49,-2.9198034618987947,3651948,1757000029,-86.97735539907637,4860.424870014191,2908,47.41
+cda-server-2,False,4901.108229875565,"{'sample_time_ms': 39714.083, 'num_steps_trained': 142800, 'grad_time_ms': 372.653, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1239.31298828125, 'policy_loss': -0.14445358514785767, 'vf_explained_var': -0.7411688566207886, 'entropy': 14.128597259521484, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1239.1910400390625, 'kl': 0.014916145242750645}, 'load_time_ms': 0.712, 'num_steps_sampled': 142800, 'update_time_ms': 2.522}",119,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.6833598613739,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,142800,142800,{},119,27,-97.12466194862068,2025-09-04_17-34-29,7.805300910529125,3651948,1757000069,-84.39691080163124,4901.108229875565,2935,46.3
+cda-server-2,False,4941.155441761017,"{'sample_time_ms': 39607.559, 'num_steps_trained': 144000, 'grad_time_ms': 370.35, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1206.25048828125, 'policy_loss': -0.12830425798892975, 'vf_explained_var': -0.652219831943512, 'entropy': 14.344528198242188, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1206.142333984375, 'kl': 0.01337106991559267}, 'load_time_ms': 0.712, 'num_steps_sampled': 144000, 'update_time_ms': 2.504}",120,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.04721188545227,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,144000,144000,{},120,26,-97.12466194862068,2025-09-04_17-35-09,7.805300910529125,3651948,1757000109,-82.81832333655902,4941.155441761017,2961,45.62
+cda-server-2,False,4981.022238731384,"{'sample_time_ms': 39599.494, 'num_steps_trained': 145200, 'grad_time_ms': 367.911, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1186.442138671875, 'policy_loss': -0.1330496370792389, 'vf_explained_var': -0.6136595606803894, 'entropy': 14.419291496276855, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1186.3292236328125, 'kl': 0.013351598754525185}, 'load_time_ms': 0.701, 'num_steps_sampled': 145200, 'update_time_ms': 2.481}",121,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.86679697036743,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,145200,145200,{},121,26,-97.12466194862068,2025-09-04_17-35-49,7.805300910529125,3651948,1757000149,-81.44347113403737,4981.022238731384,2987,45.06
+cda-server-2,False,5020.848915338516,"{'sample_time_ms': 39604.063, 'num_steps_trained': 146400, 'grad_time_ms': 367.975, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1180.305419921875, 'policy_loss': -0.13235728442668915, 'vf_explained_var': -0.553139865398407, 'entropy': 14.17344856262207, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1180.1942138671875, 'kl': 0.013966232538223267}, 'load_time_ms': 0.704, 'num_steps_sampled': 146400, 'update_time_ms': 2.469}",122,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.82667660713196,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,146400,146400,{},122,27,-96.53858566931319,2025-09-04_17-36-29,7.805300910529125,3651948,1757000189,-82.05372274573158,5020.848915338516,3014,45.31
+cda-server-2,False,5060.564336061478,"{'sample_time_ms': 39551.678, 'num_steps_trained': 147600, 'grad_time_ms': 367.708, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1200.6090087890625, 'policy_loss': -0.14686201512813568, 'vf_explained_var': -0.4810258448123932, 'entropy': 14.21179485321045, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1200.483154296875, 'kl': 0.01375659555196762}, 'load_time_ms': 0.693, 'num_steps_sampled': 147600, 'update_time_ms': 2.472}",123,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.715420722961426,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,147600,147600,{},123,26,-96.53858566931319,2025-09-04_17-37-09,-1.4890587415309486,3651948,1757000229,-83.17987228195717,5060.564336061478,3040,45.85
+cda-server-2,False,5100.991299629211,"{'sample_time_ms': 39605.376, 'num_steps_trained': 148800, 'grad_time_ms': 365.949, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1039.895751953125, 'policy_loss': -0.14113567769527435, 'vf_explained_var': -0.5060775876045227, 'entropy': 13.987238883972168, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1039.7767333984375, 'kl': 0.014459229074418545}, 'load_time_ms': 0.692, 'num_steps_sampled': 148800, 'update_time_ms': 2.55}",124,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.426963567733765,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,148800,148800,{},124,25,-96.53858566931319,2025-09-04_17-37-49,-1.4890587415309486,3651948,1757000269,-82.2096470112448,5100.991299629211,3065,45.5
+cda-server-2,False,5140.684392690659,"{'sample_time_ms': 39602.107, 'num_steps_trained': 150000, 'grad_time_ms': 364.369, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 1045.89404296875, 'policy_loss': -0.1301415115594864, 'vf_explained_var': -0.6093275547027588, 'entropy': 14.328471183776855, 'cur_lr': 4.999999873689376e-05, 'total_loss': 1045.78515625, 'kl': 0.013961934484541416}, 'load_time_ms': 0.67, 'num_steps_sampled': 150000, 'update_time_ms': 2.543}",125,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.693093061447144,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,150000,150000,{},125,26,-96.53858566931319,2025-09-04_17-38-29,-1.4890587415309486,3651948,1757000309,-83.63141672561046,5140.684392690659,3091,46.22
+cda-server-2,False,5180.632479429245,"{'sample_time_ms': 39617.571, 'num_steps_trained': 151200, 'grad_time_ms': 363.531, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 757.4061279296875, 'policy_loss': -0.13331261277198792, 'vf_explained_var': -0.2395211011171341, 'entropy': 14.12633991241455, 'cur_lr': 4.999999873689376e-05, 'total_loss': 757.2942504882812, 'kl': 0.014148239977657795}, 'load_time_ms': 0.659, 'num_steps_sampled': 151200, 'update_time_ms': 2.538}",126,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.948086738586426,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,151200,151200,{},126,26,-96.40172250854067,2025-09-04_17-39-09,-1.4890587415309486,3651948,1757000349,-83.57069733948921,5180.632479429245,3117,46.48
+cda-server-2,False,5220.544438838959,"{'sample_time_ms': 39629.25, 'num_steps_trained': 152400, 'grad_time_ms': 366.799, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 601.220458984375, 'policy_loss': -0.13246841728687286, 'vf_explained_var': -0.1447088122367859, 'entropy': 14.11307430267334, 'cur_lr': 4.999999873689376e-05, 'total_loss': 601.1065673828125, 'kl': 0.012203659862279892}, 'load_time_ms': 0.661, 'num_steps_sampled': 152400, 'update_time_ms': 2.599}",127,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.911959409713745,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,152400,152400,{},127,28,-96.33489906872681,2025-09-04_17-39-49,6.00000171303838,3651948,1757000389,-82.60059780921091,5220.544438838959,3145,46.14
+cda-server-2,False,5261.096935510635,"{'sample_time_ms': 39690.801, 'num_steps_trained': 153600, 'grad_time_ms': 368.482, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 506.0225830078125, 'policy_loss': -0.12658780813217163, 'vf_explained_var': -0.06294663995504379, 'entropy': 14.06998062133789, 'cur_lr': 4.999999873689376e-05, 'total_loss': 505.9153747558594, 'kl': 0.012763193808495998}, 'load_time_ms': 0.666, 'num_steps_sampled': 153600, 'update_time_ms': 2.658}",128,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.552496671676636,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,153600,153600,{},128,25,-96.33489906872681,2025-09-04_17-40-30,6.00000171303838,3651948,1757000430,-83.05137308127264,5261.096935510635,3170,46.31
+cda-server-2,False,5300.679358243942,"{'sample_time_ms': 39578.543, 'num_steps_trained': 154800, 'grad_time_ms': 370.609, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 431.7116394042969, 'policy_loss': -0.15474864840507507, 'vf_explained_var': -0.01031529251486063, 'entropy': 14.282392501831055, 'cur_lr': 4.999999873689376e-05, 'total_loss': 431.57843017578125, 'kl': 0.01416665967553854}, 'load_time_ms': 0.681, 'num_steps_sampled': 154800, 'update_time_ms': 2.658}",129,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.582422733306885,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,154800,154800,{},129,24,-95.68258263100707,2025-09-04_17-41-09,6.00000171303838,3651948,1757000469,-83.72035979343802,5300.679358243942,3194,46.6
+cda-server-2,False,5340.522296190262,"{'sample_time_ms': 39556.789, 'num_steps_trained': 156000, 'grad_time_ms': 371.925, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 437.48919677734375, 'policy_loss': -0.15160608291625977, 'vf_explained_var': 0.0015125274658203125, 'entropy': 14.29353141784668, 'cur_lr': 4.999999873689376e-05, 'total_loss': 437.3584899902344, 'kl': 0.0137290358543396}, 'load_time_ms': 0.674, 'num_steps_sampled': 156000, 'update_time_ms': 2.657}",130,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.84293794631958,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,156000,156000,{},130,25,-95.68258263100707,2025-09-04_17-41-49,6.00000171303838,3651948,1757000509,-84.13483640434106,5340.522296190262,3219,46.62
+cda-server-2,False,5380.822713375092,"{'sample_time_ms': 39598.8, 'num_steps_trained': 157200, 'grad_time_ms': 373.285, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 529.89306640625, 'policy_loss': -0.13991203904151917, 'vf_explained_var': 0.0049516428261995316, 'entropy': 13.942055702209473, 'cur_lr': 4.999999873689376e-05, 'total_loss': 529.7734375, 'kl': 0.013350359164178371}, 'load_time_ms': 0.672, 'num_steps_sampled': 157200, 'update_time_ms': 2.644}",131,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.30041718482971,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,157200,157200,{},131,26,-98.09153998826689,2025-09-04_17-42-30,0.0009420488181604014,3651948,1757000550,-85.1697245143577,5380.822713375092,3245,47.13
+cda-server-2,False,5421.157953977585,"{'sample_time_ms': 39650.571, 'num_steps_trained': 158400, 'grad_time_ms': 372.43, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 466.9972229003906, 'policy_loss': -0.16029267013072968, 'vf_explained_var': 0.00613213237375021, 'entropy': 13.89816665649414, 'cur_lr': 4.999999873689376e-05, 'total_loss': 466.85931396484375, 'kl': 0.014697965234518051}, 'load_time_ms': 0.67, 'num_steps_sampled': 158400, 'update_time_ms': 2.623}",132,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.335240602493286,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,158400,158400,{},132,28,-98.09153998826689,2025-09-04_17-43-10,0.0009420488181604014,3651948,1757000590,-83.4383318103255,5421.157953977585,3273,46.48
+cda-server-2,False,5461.025522947311,"{'sample_time_ms': 39664.748, 'num_steps_trained': 159600, 'grad_time_ms': 373.541, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 435.2093200683594, 'policy_loss': -0.14789816737174988, 'vf_explained_var': 0.018822822719812393, 'entropy': 13.995210647583008, 'cur_lr': 4.999999873689376e-05, 'total_loss': 435.0834045410156, 'kl': 0.014483905397355556}, 'load_time_ms': 0.672, 'num_steps_sampled': 159600, 'update_time_ms': 2.6}",133,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.86756896972656,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,159600,159600,{},133,27,-98.09153998826689,2025-09-04_17-43-50,0.0009420488181604014,3651948,1757000630,-80.48768962719447,5461.025522947311,3300,45.15
+cda-server-2,False,5502.38499712944,"{'sample_time_ms': 39756.354, 'num_steps_trained': 160800, 'grad_time_ms': 375.214, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 497.5989685058594, 'policy_loss': -0.1495353728532791, 'vf_explained_var': 0.03133540600538254, 'entropy': 14.031764030456543, 'cur_lr': 4.999999873689376e-05, 'total_loss': 497.4719543457031, 'kl': 0.014864559285342693}, 'load_time_ms': 0.676, 'num_steps_sampled': 160800, 'update_time_ms': 2.575}",134,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.359474182128906,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,160800,160800,{},134,28,-96.25872951173972,2025-09-04_17-44-31,-5.1776342299954425,3651948,1757000671,-77.20826072261913,5502.38499712944,3328,43.67
+cda-server-2,False,5542.784331083298,"{'sample_time_ms': 39823.63, 'num_steps_trained': 162000, 'grad_time_ms': 378.518, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 494.53619384765625, 'policy_loss': -0.15687929093837738, 'vf_explained_var': 0.006207088474184275, 'entropy': 13.837095260620117, 'cur_lr': 4.999999873689376e-05, 'total_loss': 494.3984375, 'kl': 0.012575294822454453}, 'load_time_ms': 0.688, 'num_steps_sampled': 162000, 'update_time_ms': 2.586}",135,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.39933395385742,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,162000,162000,{},135,30,-96.70286184158135,2025-09-04_17-45-12,8.000000400002254,3651948,1757000712,-75.40999040364656,5542.784331083298,3358,42.73
+cda-server-2,False,5582.566870212555,"{'sample_time_ms': 39807.303, 'num_steps_trained': 163200, 'grad_time_ms': 378.286, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 429.1787109375, 'policy_loss': -0.13850900530815125, 'vf_explained_var': 0.004715243820101023, 'entropy': 13.751395225524902, 'cur_lr': 4.999999873689376e-05, 'total_loss': 429.0587463378906, 'kl': 0.01221616193652153}, 'load_time_ms': 0.693, 'num_steps_sampled': 163200, 'update_time_ms': 2.603}",136,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.7825391292572,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,163200,163200,{},136,24,-96.70286184158135,2025-09-04_17-45-51,8.000000400002254,3651948,1757000751,-77.38409159796784,5582.566870212555,3382,43.6
+cda-server-2,False,5622.550188064575,"{'sample_time_ms': 39815.366, 'num_steps_trained': 164400, 'grad_time_ms': 377.402, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 519.8641357421875, 'policy_loss': -0.15484751760959625, 'vf_explained_var': 9.56919466261752e-05, 'entropy': 14.056158065795898, 'cur_lr': 4.999999873689376e-05, 'total_loss': 519.7298583984375, 'kl': 0.013529930263757706}, 'load_time_ms': 0.695, 'num_steps_sampled': 164400, 'update_time_ms': 2.568}",137,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.983317852020264,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,164400,164400,{},137,25,-96.70286184158135,2025-09-04_17-46-31,8.000000400002254,3651948,1757000791,-78.37530479396784,5622.550188064575,3407,44.37
+cda-server-2,False,5662.401572704315,"{'sample_time_ms': 39744.196, 'num_steps_trained': 165600, 'grad_time_ms': 378.524, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 511.96624755859375, 'policy_loss': -0.14721018075942993, 'vf_explained_var': 0.022053804248571396, 'entropy': 13.871037483215332, 'cur_lr': 4.999999873689376e-05, 'total_loss': 511.83843994140625, 'kl': 0.012805236503481865}, 'load_time_ms': 0.686, 'num_steps_sampled': 165600, 'update_time_ms': 2.52}",138,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.85138463973999,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,165600,165600,{},138,29,-96.70286184158135,2025-09-04_17-47-11,6.000004628464221,3651948,1757000831,-80.62368275993127,5662.401572704315,3436,45.52
+cda-server-2,False,5702.67907166481,"{'sample_time_ms': 39815.884, 'num_steps_trained': 166800, 'grad_time_ms': 376.383, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 462.9251403808594, 'policy_loss': -0.14543704688549042, 'vf_explained_var': 0.013895895332098007, 'entropy': 13.724479675292969, 'cur_lr': 4.999999873689376e-05, 'total_loss': 462.7996826171875, 'kl': 0.013188743032515049}, 'load_time_ms': 0.678, 'num_steps_sampled': 166800, 'update_time_ms': 2.513}",139,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.277498960494995,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,166800,166800,{},139,26,-96.3706927890899,2025-09-04_17-47-51,6.000004628464221,3651948,1757000871,-82.57511411226803,5702.67907166481,3462,46.38
+cda-server-2,False,5742.838150262833,"{'sample_time_ms': 39847.8, 'num_steps_trained': 168000, 'grad_time_ms': 376.118, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 468.1904296875, 'policy_loss': -0.12903155386447906, 'vf_explained_var': 0.010596592910587788, 'entropy': 13.66910457611084, 'cur_lr': 4.999999873689376e-05, 'total_loss': 468.0818786621094, 'kl': 0.013512490317225456}, 'load_time_ms': 0.673, 'num_steps_sampled': 168000, 'update_time_ms': 2.509}",140,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.15907859802246,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,168000,168000,{},140,25,-96.3706927890899,2025-09-04_17-48-32,4.00041902346528,3651948,1757000912,-81.06491087767878,5742.838150262833,3487,45.78
+cda-server-2,False,5783.118670940399,"{'sample_time_ms': 39845.818, 'num_steps_trained': 169200, 'grad_time_ms': 376.048, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 519.5208129882812, 'policy_loss': -0.14394643902778625, 'vf_explained_var': 0.007588174659758806, 'entropy': 13.70052433013916, 'cur_lr': 4.999999873689376e-05, 'total_loss': 519.397705078125, 'kl': 0.01370695885270834}, 'load_time_ms': 0.678, 'num_steps_sampled': 169200, 'update_time_ms': 2.517}",141,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.28052067756653,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,169200,169200,{},141,30,-94.9174978999614,2025-09-04_17-49-12,4.00041902346528,3651948,1757000952,-78.30766103408874,5783.118670940399,3517,44.28
+cda-server-2,False,5823.950105428696,"{'sample_time_ms': 39894.033, 'num_steps_trained': 170400, 'grad_time_ms': 377.402, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 533.2041015625, 'policy_loss': -0.15218709409236908, 'vf_explained_var': 0.027479078620672226, 'entropy': 14.154834747314453, 'cur_lr': 4.999999873689376e-05, 'total_loss': 533.073486328125, 'kl': 0.014139831066131592}, 'load_time_ms': 0.687, 'num_steps_sampled': 170400, 'update_time_ms': 2.548}",142,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.83143448829651,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,170400,170400,{},142,28,-95.80786370762291,2025-09-04_17-49-53,2.000266023377246,3651948,1757000993,-75.77111625323947,5823.950105428696,3545,43.22
+cda-server-2,False,5864.083309173584,"{'sample_time_ms': 39920.658, 'num_steps_trained': 171600, 'grad_time_ms': 377.249, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 447.897216796875, 'policy_loss': -0.14685262739658356, 'vf_explained_var': 0.012203852646052837, 'entropy': 13.77534008026123, 'cur_lr': 4.999999873689376e-05, 'total_loss': 447.7716979980469, 'kl': 0.014064337126910686}, 'load_time_ms': 0.698, 'num_steps_sampled': 171600, 'update_time_ms': 2.559}",143,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.133203744888306,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,171600,171600,{},143,28,-95.80786370762291,2025-09-04_17-50-33,2.000266023377246,3651948,1757001033,-74.91504953685117,5864.083309173584,3573,42.82
+cda-server-2,False,5904.289658069611,"{'sample_time_ms': 39806.058, 'num_steps_trained': 172800, 'grad_time_ms': 376.511, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 439.82000732421875, 'policy_loss': -0.15573416650295258, 'vf_explained_var': 0.010575804859399796, 'entropy': 13.51430892944336, 'cur_lr': 4.999999873689376e-05, 'total_loss': 439.6846008300781, 'kl': 0.013372303918004036}, 'load_time_ms': 0.689, 'num_steps_sampled': 172800, 'update_time_ms': 2.545}",144,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.20634889602661,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,172800,172800,{},144,26,-96.95658790248578,2025-09-04_17-51-13,2.000266023377246,3651948,1757001073,-77.47728638204703,5904.289658069611,3599,43.86
+cda-server-2,False,5944.542104482651,"{'sample_time_ms': 39793.73, 'num_steps_trained': 174000, 'grad_time_ms': 374.167, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 489.4002685546875, 'policy_loss': -0.15363189578056335, 'vf_explained_var': 0.010992010124027729, 'entropy': 13.861942291259766, 'cur_lr': 4.999999873689376e-05, 'total_loss': 489.2677307128906, 'kl': 0.013880307786166668}, 'load_time_ms': 0.683, 'num_steps_sampled': 174000, 'update_time_ms': 2.551}",145,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.25244641304016,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,174000,174000,{},145,30,-96.95658790248578,2025-09-04_17-51-53,6.00050672631794,3651948,1757001113,-77.48228222365948,5944.542104482651,3629,43.74
+cda-server-2,False,5984.474422693253,"{'sample_time_ms': 39808.244, 'num_steps_trained': 175200, 'grad_time_ms': 374.642, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 483.8934326171875, 'policy_loss': -0.16133642196655273, 'vf_explained_var': 0.004873269237577915, 'entropy': 13.626518249511719, 'cur_lr': 4.999999873689376e-05, 'total_loss': 483.7530517578125, 'kl': 0.013784998096525669}, 'load_time_ms': 0.681, 'num_steps_sampled': 175200, 'update_time_ms': 2.542}",146,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.93231821060181,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,175200,175200,{},146,25,-96.95658790248578,2025-09-04_17-52-33,6.00050672631794,3651948,1757001153,-77.00879523596886,5984.474422693253,3654,43.39
+cda-server-2,False,6024.323261499405,"{'sample_time_ms': 39797.488, 'num_steps_trained': 176400, 'grad_time_ms': 371.987, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 489.1770324707031, 'policy_loss': -0.1577053815126419, 'vf_explained_var': 0.02425098419189453, 'entropy': 13.7898530960083, 'cur_lr': 4.999999873689376e-05, 'total_loss': 489.03948974609375, 'kl': 0.013261471875011921}, 'load_time_ms': 0.676, 'num_steps_sampled': 176400, 'update_time_ms': 2.547}",147,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.848838806152344,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,176400,176400,{},147,28,-96.95658790248578,2025-09-04_17-53-13,6.00050672631794,3651948,1757001193,-76.72782283086792,6024.323261499405,3682,43.29
+cda-server-2,False,6064.125794410706,"{'sample_time_ms': 39792.766, 'num_steps_trained': 177600, 'grad_time_ms': 371.849, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 526.368408203125, 'policy_loss': -0.14639145135879517, 'vf_explained_var': 0.020639657974243164, 'entropy': 13.701428413391113, 'cur_lr': 4.999999873689376e-05, 'total_loss': 526.2421875, 'kl': 0.013283911161124706}, 'load_time_ms': 0.686, 'num_steps_sampled': 177600, 'update_time_ms': 2.554}",148,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.80253291130066,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,177600,177600,{},148,27,-96.31788008641165,2025-09-04_17-53-53,6.00050672631794,3651948,1757001233,-77.02193331284515,6064.125794410706,3709,43.53
+cda-server-2,False,6104.521768093109,"{'sample_time_ms': 39804.917, 'num_steps_trained': 178800, 'grad_time_ms': 371.567, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 534.6456298828125, 'policy_loss': -0.14344368875026703, 'vf_explained_var': 0.032623257488012314, 'entropy': 13.884628295898438, 'cur_lr': 4.999999873689376e-05, 'total_loss': 534.522705078125, 'kl': 0.013491793535649776}, 'load_time_ms': 0.68, 'num_steps_sampled': 178800, 'update_time_ms': 2.547}",149,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.395973682403564,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,178800,178800,{},149,27,-96.31788008641165,2025-09-04_17-54-34,4.000905065352485,3651948,1757001274,-77.59475084319799,6104.521768093109,3736,44.08
+cda-server-2,False,6144.585580587387,"{'sample_time_ms': 39797.247, 'num_steps_trained': 180000, 'grad_time_ms': 369.718, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 589.8328857421875, 'policy_loss': -0.15737102925777435, 'vf_explained_var': 0.0029666093178093433, 'entropy': 13.93885326385498, 'cur_lr': 4.999999873689376e-05, 'total_loss': 589.6962280273438, 'kl': 0.013605907559394836}, 'load_time_ms': 0.682, 'num_steps_sampled': 180000, 'update_time_ms': 2.553}",150,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.063812494277954,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,180000,180000,{},150,28,-96.31788008641165,2025-09-04_17-55-14,4.000905065352485,3651948,1757001314,-75.6539729494931,6144.585580587387,3764,43.33
+cda-server-2,False,6185.331671714783,"{'sample_time_ms': 39844.811, 'num_steps_trained': 181200, 'grad_time_ms': 368.735, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 573.21142578125, 'policy_loss': -0.1537049263715744, 'vf_explained_var': 0.009622778743505478, 'entropy': 13.354726791381836, 'cur_lr': 4.999999873689376e-05, 'total_loss': 573.0790405273438, 'kl': 0.01405271515250206}, 'load_time_ms': 0.673, 'num_steps_sampled': 181200, 'update_time_ms': 2.562}",151,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.74609112739563,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,181200,181200,{},151,33,-96.06327662108316,2025-09-04_17-55-54,6.000005684032507,3651948,1757001354,-73.6287210840856,6185.331671714783,3797,42.29
+cda-server-2,False,6225.614793539047,"{'sample_time_ms': 39791.345, 'num_steps_trained': 182400, 'grad_time_ms': 367.327, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 563.124755859375, 'policy_loss': -0.14891427755355835, 'vf_explained_var': 0.00879173818975687, 'entropy': 13.944217681884766, 'cur_lr': 4.999999873689376e-05, 'total_loss': 562.9971923828125, 'kl': 0.014039833098649979}, 'load_time_ms': 0.658, 'num_steps_sampled': 182400, 'update_time_ms': 2.626}",152,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.283121824264526,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,182400,182400,{},152,29,-96.06327662108316,2025-09-04_17-56-35,6.000005684032507,3651948,1757001395,-69.42951983223737,6225.614793539047,3826,40.17
+cda-server-2,False,6263.704438686371,"{'sample_time_ms': 39589.201, 'num_steps_trained': 183600, 'grad_time_ms': 365.196, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 451.091796875, 'policy_loss': -0.16401776671409607, 'vf_explained_var': 0.008112185634672642, 'entropy': 13.623714447021484, 'cur_lr': 4.999999873689376e-05, 'total_loss': 450.9493103027344, 'kl': 0.014164643362164497}, 'load_time_ms': 0.653, 'num_steps_sampled': 183600, 'update_time_ms': 2.607}",153,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",38.08964514732361,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,183600,183600,{},153,27,-96.06327662108316,2025-09-04_17-57-13,6.000005684032507,3651948,1757001433,-72.39619258771732,6263.704438686371,3853,41.43
+cda-server-2,False,6297.63969874382,"{'sample_time_ms': 38960.78, 'num_steps_trained': 184800, 'grad_time_ms': 366.522, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 426.76007080078125, 'policy_loss': -0.1499679982662201, 'vf_explained_var': 0.019961846992373466, 'entropy': 14.01634407043457, 'cur_lr': 4.999999873689376e-05, 'total_loss': 426.6302185058594, 'kl': 0.01324660424143076}, 'load_time_ms': 0.675, 'num_steps_sampled': 184800, 'update_time_ms': 2.571}",154,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.93526005744934,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,184800,184800,{},154,27,-96.02294955272997,2025-09-04_17-57-47,6.000005684032507,3651948,1757001467,-73.46839023207444,6297.63969874382,3880,42.25
+cda-server-2,False,6330.8898866176605,"{'sample_time_ms': 38258.875, 'num_steps_trained': 186000, 'grad_time_ms': 368.158, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 463.51904296875, 'policy_loss': -0.14390847086906433, 'vf_explained_var': 0.0144983334466815, 'entropy': 13.602646827697754, 'cur_lr': 4.999999873689376e-05, 'total_loss': 463.39495849609375, 'kl': 0.013063447549939156}, 'load_time_ms': 0.683, 'num_steps_sampled': 186000, 'update_time_ms': 2.576}",155,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.25018787384033,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,186000,186000,{},155,28,-96.02294955272997,2025-09-04_17-58-20,6.000153967687247,3651948,1757001500,-74.1287173082405,6330.8898866176605,3908,42.73
+cda-server-2,False,6365.358276844025,"{'sample_time_ms': 37712.506, 'num_steps_trained': 187200, 'grad_time_ms': 368.12, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 447.3106384277344, 'policy_loss': -0.1546049267053604, 'vf_explained_var': 0.0173814557492733, 'entropy': 13.743790626525879, 'cur_lr': 4.999999873689376e-05, 'total_loss': 447.1781311035156, 'kl': 0.014562149532139301}, 'load_time_ms': 0.677, 'num_steps_sampled': 187200, 'update_time_ms': 2.598}",156,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.468390226364136,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,187200,187200,{},156,29,-96.02294955272997,2025-09-04_17-58-55,6.000153967687247,3651948,1757001535,-73.961188285357,6365.358276844025,3937,42.69
+cda-server-2,False,6398.991594314575,"{'sample_time_ms': 37090.133, 'num_steps_trained': 188400, 'grad_time_ms': 368.892, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 432.24102783203125, 'policy_loss': -0.16239456832408905, 'vf_explained_var': 0.02992052584886551, 'entropy': 13.820674896240234, 'cur_lr': 4.999999873689376e-05, 'total_loss': 432.09716796875, 'kl': 0.012198535725474358}, 'load_time_ms': 0.68, 'num_steps_sampled': 188400, 'update_time_ms': 2.579}",157,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.63331747055054,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,188400,188400,{},157,28,-94.76394572856147,2025-09-04_17-59-28,6.000153967687247,3651948,1757001568,-73.47760643679179,6398.991594314575,3965,42.53
+cda-server-2,False,6432.306711435318,"{'sample_time_ms': 36440.832, 'num_steps_trained': 189600, 'grad_time_ms': 369.403, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 460.80865478515625, 'policy_loss': -0.15355131030082703, 'vf_explained_var': 0.014817522838711739, 'entropy': 13.547548294067383, 'cur_lr': 4.999999873689376e-05, 'total_loss': 460.6759948730469, 'kl': 0.013771760277450085}, 'load_time_ms': 0.688, 'num_steps_sampled': 189600, 'update_time_ms': 2.583}",158,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.3151171207428,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,189600,189600,{},158,27,-94.76394572856147,2025-09-04_18-00-02,4.000566881068873,3651948,1757001602,-74.17728514692062,6432.306711435318,3992,42.67
+cda-server-2,False,6465.709766387939,"{'sample_time_ms': 35739.848, 'num_steps_trained': 190800, 'grad_time_ms': 371.059, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 503.6580810546875, 'policy_loss': -0.15453101694583893, 'vf_explained_var': 0.01870148628950119, 'entropy': 13.329124450683594, 'cur_lr': 4.999999873689376e-05, 'total_loss': 503.5256042480469, 'kl': 0.014533232897520065}, 'load_time_ms': 0.693, 'num_steps_sampled': 190800, 'update_time_ms': 2.617}",159,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.40305495262146,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,190800,190800,{},159,31,-95.1787811615368,2025-09-04_18-00-35,4.000566881068873,3651948,1757001635,-73.62973317615167,6465.709766387939,4023,42.36
+cda-server-2,False,6499.890940904617,"{'sample_time_ms': 35151.71, 'num_steps_trained': 192000, 'grad_time_ms': 370.927, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 504.2874450683594, 'policy_loss': -0.16084225475788116, 'vf_explained_var': 0.008933212608098984, 'entropy': 13.373307228088379, 'cur_lr': 4.999999873689376e-05, 'total_loss': 504.1483154296875, 'kl': 0.014286703430116177}, 'load_time_ms': 0.698, 'num_steps_sampled': 192000, 'update_time_ms': 2.6}",160,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.181174516677856,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,192000,192000,{},160,28,-95.1787811615368,2025-09-04_18-01-09,4.000566881068873,3651948,1757001669,-71.18900937259428,6499.890940904617,4051,41.31
+cda-server-2,False,6533.306238651276,"{'sample_time_ms': 34417.313, 'num_steps_trained': 193200, 'grad_time_ms': 372.291, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 443.4478759765625, 'policy_loss': -0.14680147171020508, 'vf_explained_var': 0.030300889164209366, 'entropy': 13.579971313476562, 'cur_lr': 4.999999873689376e-05, 'total_loss': 443.3230285644531, 'kl': 0.014476616866886616}, 'load_time_ms': 0.704, 'num_steps_sampled': 193200, 'update_time_ms': 2.579}",161,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.415297746658325,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,193200,193200,{},161,29,-95.1787811615368,2025-09-04_18-01-43,5.039762891774004,3651948,1757001703,-69.76854035672953,6533.306238651276,4080,40.67
+cda-server-2,False,6566.520789146423,"{'sample_time_ms': 33708.97, 'num_steps_trained': 194400, 'grad_time_ms': 373.746, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 498.4832458496094, 'policy_loss': -0.15090115368366241, 'vf_explained_var': 0.02782423608005047, 'entropy': 13.286617279052734, 'cur_lr': 4.999999873689376e-05, 'total_loss': 498.3533020019531, 'kl': 0.013792970217764378}, 'load_time_ms': 0.731, 'num_steps_sampled': 194400, 'update_time_ms': 2.501}",162,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.214550495147705,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,194400,194400,{},162,27,-95.11817286038946,2025-09-04_18-02-16,5.039762891774004,3651948,1757001736,-72.1282965410242,6566.520789146423,4107,41.9
+cda-server-2,False,6600.906383752823,"{'sample_time_ms': 33337.011, 'num_steps_trained': 195600, 'grad_time_ms': 375.233, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 498.46148681640625, 'policy_loss': -0.15607160329818726, 'vf_explained_var': 0.01455751433968544, 'entropy': 13.587542533874512, 'cur_lr': 4.999999873689376e-05, 'total_loss': 498.3251953125, 'kl': 0.012981893494725227}, 'load_time_ms': 0.73, 'num_steps_sampled': 195600, 'update_time_ms': 2.558}",163,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.385594606399536,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,195600,195600,{},163,30,-94.88398095419217,2025-09-04_18-02-50,5.039762891774004,3651948,1757001770,-72.87153451901149,6600.906383752823,4137,42.22
+cda-server-2,False,6634.513481616974,"{'sample_time_ms': 33304.768, 'num_steps_trained': 196800, 'grad_time_ms': 374.63, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 469.6560363769531, 'policy_loss': -0.15397673845291138, 'vf_explained_var': 0.018685288727283478, 'entropy': 13.264029502868652, 'cur_lr': 4.999999873689376e-05, 'total_loss': 469.5223388671875, 'kl': 0.013345572166144848}, 'load_time_ms': 0.728, 'num_steps_sampled': 196800, 'update_time_ms': 2.584}",164,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.607097864151,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,196800,196800,{},164,27,-96.75490613689337,2025-09-04_18-03-24,4.000333877647177,3651948,1757001804,-74.19180616935152,6634.513481616974,4164,42.63
+cda-server-2,False,6667.584945678711,"{'sample_time_ms': 33289.103, 'num_steps_trained': 198000, 'grad_time_ms': 372.48, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 494.9625549316406, 'policy_loss': -0.15671184659004211, 'vf_explained_var': 0.012010018341243267, 'entropy': 13.140623092651367, 'cur_lr': 4.999999873689376e-05, 'total_loss': 494.8278503417969, 'kl': 0.014488577842712402}, 'load_time_ms': 0.71, 'num_steps_sampled': 198000, 'update_time_ms': 2.588}",165,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.07146406173706,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,198000,198000,{},165,28,-96.75490613689337,2025-09-04_18-03-57,4.000333877647177,3651948,1757001837,-73.97899578124846,6667.584945678711,4192,42.41
+cda-server-2,False,6700.747930765152,"{'sample_time_ms': 33159.297, 'num_steps_trained': 199200, 'grad_time_ms': 371.752, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 479.74163818359375, 'policy_loss': -0.16004471480846405, 'vf_explained_var': 0.022694991901516914, 'entropy': 13.23586368560791, 'cur_lr': 4.999999873689376e-05, 'total_loss': 479.6025390625, 'kl': 0.01381033007055521}, 'load_time_ms': 0.719, 'num_steps_sampled': 199200, 'update_time_ms': 2.576}",166,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.16298508644104,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,199200,199200,{},166,27,-96.75490613689337,2025-09-04_18-04-30,4.000038700747987,3651948,1757001870,-74.60556796545686,6700.747930765152,4219,42.7
+cda-server-2,False,6734.210085391998,"{'sample_time_ms': 33141.034, 'num_steps_trained': 200400, 'grad_time_ms': 372.896, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 481.43072509765625, 'policy_loss': -0.16376127302646637, 'vf_explained_var': 0.02091793902218342, 'entropy': 12.862247467041016, 'cur_lr': 4.999999873689376e-05, 'total_loss': 481.28900146484375, 'kl': 0.014525890350341797}, 'load_time_ms': 0.715, 'num_steps_sampled': 200400, 'update_time_ms': 2.6}",167,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.46215462684631,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,200400,200400,{},167,28,-96.75490613689337,2025-09-04_18-05-04,4.000213607189957,3651948,1757001904,-75.69941167396468,6734.210085391998,4247,43.31
+cda-server-2,False,6768.151931285858,"{'sample_time_ms': 33205.106, 'num_steps_trained': 201600, 'grad_time_ms': 371.545, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 431.3326721191406, 'policy_loss': -0.15331611037254333, 'vf_explained_var': 0.0383436493575573, 'entropy': 13.03227424621582, 'cur_lr': 4.999999873689376e-05, 'total_loss': 431.20166015625, 'kl': 0.014662904664874077}, 'load_time_ms': 0.701, 'num_steps_sampled': 201600, 'update_time_ms': 2.606}",168,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.94184589385986,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,201600,201600,{},168,28,-96.37598652716197,2025-09-04_18-05-38,6.000130978520583,3651948,1757001938,-75.83625850182565,6768.151931285858,4275,43.39
+cda-server-2,False,6801.593436717987,"{'sample_time_ms': 33208.048, 'num_steps_trained': 202800, 'grad_time_ms': 372.423, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 445.61370849609375, 'policy_loss': -0.15942586958408356, 'vf_explained_var': 0.006676660850644112, 'entropy': 13.269512176513672, 'cur_lr': 4.999999873689376e-05, 'total_loss': 445.47357177734375, 'kl': 0.012679451145231724}, 'load_time_ms': 0.715, 'num_steps_sampled': 202800, 'update_time_ms': 2.582}",169,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.441505432128906,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,202800,202800,{},169,29,-96.63690140637001,2025-09-04_18-06-11,6.000130978520583,3651948,1757001971,-74.06123125540654,6801.593436717987,4304,42.65
+cda-server-2,False,6834.95436167717,"{'sample_time_ms': 33122.278, 'num_steps_trained': 204000, 'grad_time_ms': 376.1, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 462.88909912109375, 'policy_loss': -0.14820978045463562, 'vf_explained_var': 0.022156503051519394, 'entropy': 12.96584415435791, 'cur_lr': 4.999999873689376e-05, 'total_loss': 462.76019287109375, 'kl': 0.012699018232524395}, 'load_time_ms': 0.736, 'num_steps_sampled': 204000, 'update_time_ms': 2.628}",170,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.36092495918274,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,204000,204000,{},170,26,-96.63690140637001,2025-09-04_18-06-44,6.000130978520583,3651948,1757002004,-76.51463005896723,6834.95436167717,4330,43.76
+cda-server-2,False,6868.499571561813,"{'sample_time_ms': 33135.07, 'num_steps_trained': 205200, 'grad_time_ms': 376.271, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 477.64007568359375, 'policy_loss': -0.14878112077713013, 'vf_explained_var': 0.024289535358548164, 'entropy': 13.268444061279297, 'cur_lr': 4.999999873689376e-05, 'total_loss': 477.5120544433594, 'kl': 0.013624078594148159}, 'load_time_ms': 0.738, 'num_steps_sampled': 205200, 'update_time_ms': 2.64}",171,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.545209884643555,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,205200,205200,{},171,32,-96.63690140637001,2025-09-04_18-07-18,8.000069988583551,3651948,1757002038,-73.74854474290508,6868.499571561813,4362,42.39
+cda-server-2,False,6901.951131343842,"{'sample_time_ms': 33159.103, 'num_steps_trained': 206400, 'grad_time_ms': 376.037, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 530.1824951171875, 'policy_loss': -0.16158553957939148, 'vf_explained_var': 0.020052360370755196, 'entropy': 13.253538131713867, 'cur_lr': 4.999999873689376e-05, 'total_loss': 530.0426025390625, 'kl': 0.014295194298028946}, 'load_time_ms': 0.717, 'num_steps_sampled': 206400, 'update_time_ms': 2.634}",172,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.4515597820282,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,206400,206400,{},172,32,-95.13871117544228,2025-09-04_18-07-52,8.000069988583551,3651948,1757002072,-68.18772978706642,6901.951131343842,4394,39.92
+cda-server-2,False,6935.213408470154,"{'sample_time_ms': 33045.654, 'num_steps_trained': 207600, 'grad_time_ms': 377.213, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 471.45257568359375, 'policy_loss': -0.16813132166862488, 'vf_explained_var': 0.016370773315429688, 'entropy': 13.099279403686523, 'cur_lr': 4.999999873689376e-05, 'total_loss': 471.30706787109375, 'kl': 0.01488409098237753}, 'load_time_ms': 0.714, 'num_steps_sampled': 207600, 'update_time_ms': 2.573}",173,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.262277126312256,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,207600,207600,{},173,29,-95.06222590109232,2025-09-04_18-08-25,8.000069988583551,3651948,1757002105,-66.36500553024902,6935.213408470154,4423,39.06
+cda-server-2,False,6968.752385139465,"{'sample_time_ms': 33038.375, 'num_steps_trained': 208800, 'grad_time_ms': 377.726, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 489.8381042480469, 'policy_loss': -0.14878961443901062, 'vf_explained_var': 0.023384928703308105, 'entropy': 13.308280944824219, 'cur_lr': 4.999999873689376e-05, 'total_loss': 489.7087707519531, 'kl': 0.01280286256223917}, 'load_time_ms': 0.711, 'num_steps_sampled': 208800, 'update_time_ms': 2.573}",174,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.53897666931152,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,208800,208800,{},174,30,-93.55236840867342,2025-09-04_18-08-58,8.000109714939725,3651948,1757002138,-65.7865687842101,6968.752385139465,4453,38.8
+cda-server-2,False,7003.800618886948,"{'sample_time_ms': 33234.718, 'num_steps_trained': 210000, 'grad_time_ms': 379.053, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 441.1793518066406, 'policy_loss': -0.14626182615756989, 'vf_explained_var': 0.043350908905267715, 'entropy': 13.320549964904785, 'cur_lr': 4.999999873689376e-05, 'total_loss': 441.05364990234375, 'kl': 0.013550628907978535}, 'load_time_ms': 0.718, 'num_steps_sampled': 210000, 'update_time_ms': 2.562}",175,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",35.0482337474823,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,210000,210000,{},175,31,-93.82099905489598,2025-09-04_18-09-33,8.000109714939725,3651948,1757002173,-68.23131268739769,7003.800618886948,4484,40.02
+cda-server-2,False,7037.028426885605,"{'sample_time_ms': 33242.547, 'num_steps_trained': 211200, 'grad_time_ms': 377.756, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 464.3078308105469, 'policy_loss': -0.15784205496311188, 'vf_explained_var': 0.021135879680514336, 'entropy': 13.235689163208008, 'cur_lr': 4.999999873689376e-05, 'total_loss': 464.1710205078125, 'kl': 0.013846870511770248}, 'load_time_ms': 0.724, 'num_steps_sampled': 211200, 'update_time_ms': 2.543}",176,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.22780799865723,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,211200,211200,{},176,27,-93.82099905489598,2025-09-04_18-10-07,8.000109714939725,3651948,1757002207,-71.104299140994,7037.028426885605,4511,41.46
+cda-server-2,False,7070.750869989395,"{'sample_time_ms': 33268.921, 'num_steps_trained': 212400, 'grad_time_ms': 377.48, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 520.8988647460938, 'policy_loss': -0.14671167731285095, 'vf_explained_var': 0.02725188620388508, 'entropy': 13.615344047546387, 'cur_lr': 4.999999873689376e-05, 'total_loss': 520.772705078125, 'kl': 0.01349978893995285}, 'load_time_ms': 0.724, 'num_steps_sampled': 212400, 'update_time_ms': 2.5}",177,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.72244310379028,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,212400,212400,{},177,30,-94.34293914620837,2025-09-04_18-10-40,8.000109714939725,3651948,1757002240,-67.67515993134072,7070.750869989395,4541,40.06
+cda-server-2,False,7105.745890855789,"{'sample_time_ms': 33374.45, 'num_steps_trained': 213600, 'grad_time_ms': 377.206, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 487.3653869628906, 'policy_loss': -0.1492527723312378, 'vf_explained_var': 0.019449617713689804, 'entropy': 13.086959838867188, 'cur_lr': 4.999999873689376e-05, 'total_loss': 487.2371520996094, 'kl': 0.013814833015203476}, 'load_time_ms': 0.728, 'num_steps_sampled': 213600, 'update_time_ms': 2.473}",178,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.99502086639404,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,213600,213600,{},178,29,-94.34293914620837,2025-09-04_18-11-15,8.000000429594232,3651948,1757002275,-68.10825736939901,7105.745890855789,4570,40.38
+cda-server-2,False,7139.0679042339325,"{'sample_time_ms': 33363.25, 'num_steps_trained': 214800, 'grad_time_ms': 376.485, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 482.8249206542969, 'policy_loss': -0.16203825175762177, 'vf_explained_var': 0.027035892009735107, 'entropy': 12.97227954864502, 'cur_lr': 4.999999873689376e-05, 'total_loss': 482.6842346191406, 'kl': 0.014075911603868008}, 'load_time_ms': 0.711, 'num_steps_sampled': 214800, 'update_time_ms': 2.498}",179,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.32201337814331,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,214800,214800,{},179,27,-95.53219191490898,2025-09-04_18-11-49,6.00005790227189,3651948,1757002309,-71.26538427002218,7139.0679042339325,4597,41.74
+cda-server-2,False,7172.459059238434,"{'sample_time_ms': 33369.89, 'num_steps_trained': 216000, 'grad_time_ms': 372.917, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 475.3916015625, 'policy_loss': -0.16252401471138, 'vf_explained_var': 0.03283761069178581, 'entropy': 12.606663703918457, 'cur_lr': 4.999999873689376e-05, 'total_loss': 475.2503356933594, 'kl': 0.014003436081111431}, 'load_time_ms': 0.684, 'num_steps_sampled': 216000, 'update_time_ms': 2.474}",180,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.39115500450134,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,216000,216000,{},180,29,-95.53219191490898,2025-09-04_18-12-22,6.00005790227189,3651948,1757002342,-73.30657166827612,7172.459059238434,4626,42.73
+cda-server-2,False,7206.650817155838,"{'sample_time_ms': 33436.797, 'num_steps_trained': 217200, 'grad_time_ms': 370.619, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 506.8238525390625, 'policy_loss': -0.1711304485797882, 'vf_explained_var': 0.027346935123205185, 'entropy': 12.956379890441895, 'cur_lr': 4.999999873689376e-05, 'total_loss': 506.6749572753906, 'kl': 0.01460947748273611}, 'load_time_ms': 0.685, 'num_steps_sampled': 217200, 'update_time_ms': 2.53}",181,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.191757917404175,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,217200,217200,{},181,31,-95.53219191490898,2025-09-04_18-12-56,8.000000440074153,3651948,1757002376,-70.12293610759806,7206.650817155838,4657,41.11
+cda-server-2,False,7240.108816862106,"{'sample_time_ms': 33437.607, 'num_steps_trained': 218400, 'grad_time_ms': 370.449, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 408.64678955078125, 'policy_loss': -0.15670305490493774, 'vf_explained_var': 0.03384535014629364, 'entropy': 13.208443641662598, 'cur_lr': 4.999999873689376e-05, 'total_loss': 408.5120544433594, 'kl': 0.014430741779506207}, 'load_time_ms': 0.684, 'num_steps_sampled': 218400, 'update_time_ms': 2.524}",182,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.45799970626831,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,218400,218400,{},182,27,-94.45431820690045,2025-09-04_18-13-30,8.000000440074153,3651948,1757002410,-70.88982028946353,7240.108816862106,4684,41.66
+cda-server-2,False,7273.374994516373,"{'sample_time_ms': 33439.775, 'num_steps_trained': 219600, 'grad_time_ms': 368.62, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 541.272705078125, 'policy_loss': -0.1531658172607422, 'vf_explained_var': 0.011464131996035576, 'entropy': 12.912820816040039, 'cur_lr': 4.999999873689376e-05, 'total_loss': 541.1414184570312, 'kl': 0.014355059713125229}, 'load_time_ms': 0.685, 'num_steps_sampled': 219600, 'update_time_ms': 2.562}",183,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.26617765426636,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,219600,219600,{},183,28,-95.21932780078414,2025-09-04_18-14-03,8.000000440074153,3651948,1757002443,-71.28978875642596,7273.374994516373,4712,41.93
+cda-server-2,False,7307.477123260498,"{'sample_time_ms': 33497.021, 'num_steps_trained': 220800, 'grad_time_ms': 367.646, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 559.41015625, 'policy_loss': -0.1447771191596985, 'vf_explained_var': 0.022654525935649872, 'entropy': 12.68217945098877, 'cur_lr': 4.999999873689376e-05, 'total_loss': 559.286865234375, 'kl': 0.014179195277392864}, 'load_time_ms': 0.669, 'num_steps_sampled': 220800, 'update_time_ms': 2.602}",184,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.102128744125366,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,220800,220800,{},184,32,-95.21932780078414,2025-09-04_18-14-37,6.0000256872259685,3651948,1757002477,-68.85681651830801,7307.477123260498,4744,40.73
+cda-server-2,False,7341.242619752884,"{'sample_time_ms': 33368.626, 'num_steps_trained': 222000, 'grad_time_ms': 367.768, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 474.8887939453125, 'policy_loss': -0.17067062854766846, 'vf_explained_var': 0.029077045619487762, 'entropy': 12.845396041870117, 'cur_lr': 4.999999873689376e-05, 'total_loss': 474.73822021484375, 'kl': 0.013275043107569218}, 'load_time_ms': 0.665, 'num_steps_sampled': 222000, 'update_time_ms': 2.562}",185,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.765496492385864,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,222000,222000,{},185,31,-95.21932780078414,2025-09-04_18-15-11,6.0000256872259685,3651948,1757002511,-68.1395418292209,7341.242619752884,4775,40.19
+cda-server-2,False,7376.1856777668,"{'sample_time_ms': 33539.186, 'num_steps_trained': 223200, 'grad_time_ms': 368.711, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 506.6676940917969, 'policy_loss': -0.16878332197666168, 'vf_explained_var': 0.018831439316272736, 'entropy': 12.581832885742188, 'cur_lr': 4.999999873689376e-05, 'total_loss': 506.5218200683594, 'kl': 0.015099719166755676}, 'load_time_ms': 0.655, 'num_steps_sampled': 223200, 'update_time_ms': 2.574}",186,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.943058013916016,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,223200,223200,{},186,27,-95.83189376358192,2025-09-04_18-15-46,6.000001728989278,3651948,1757002546,-69.54346150398968,7376.1856777668,4802,40.66
+cda-server-2,False,7409.781413793564,"{'sample_time_ms': 33527.805, 'num_steps_trained': 224400, 'grad_time_ms': 367.409, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 461.66259765625, 'policy_loss': -0.15208211541175842, 'vf_explained_var': 0.024997631087899208, 'entropy': 13.037174224853516, 'cur_lr': 4.999999873689376e-05, 'total_loss': 461.5320129394531, 'kl': 0.014166755601763725}, 'load_time_ms': 0.661, 'num_steps_sampled': 224400, 'update_time_ms': 2.574}",187,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.595736026763916,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,224400,224400,{},187,32,-95.83189376358192,2025-09-04_18-16-20,2.0001916476801034,3651948,1757002580,-70.26994574194738,7409.781413793564,4834,40.89
+cda-server-2,False,7443.122005939484,"{'sample_time_ms': 33364.201, 'num_steps_trained': 225600, 'grad_time_ms': 365.665, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 431.95404052734375, 'policy_loss': -0.16554684937000275, 'vf_explained_var': 0.03322778642177582, 'entropy': 12.807843208312988, 'cur_lr': 4.999999873689376e-05, 'total_loss': 431.8093566894531, 'kl': 0.013717424124479294}, 'load_time_ms': 0.659, 'num_steps_sampled': 225600, 'update_time_ms': 2.571}",188,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.3405921459198,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,225600,225600,{},188,26,-95.83189376358192,2025-09-04_18-16-53,1.7789538112237562,3651948,1757002613,-70.59327772699594,7443.122005939484,4860,41.15
+cda-server-2,False,7476.492438316345,"{'sample_time_ms': 33368.417, 'num_steps_trained': 226800, 'grad_time_ms': 366.289, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 455.0348205566406, 'policy_loss': -0.15835967659950256, 'vf_explained_var': 0.03563562408089638, 'entropy': 12.52796745300293, 'cur_lr': 4.999999873689376e-05, 'total_loss': 454.89776611328125, 'kl': 0.014046341180801392}, 'load_time_ms': 0.663, 'num_steps_sampled': 226800, 'update_time_ms': 2.535}",189,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.37043237686157,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,226800,226800,{},189,28,-95.40900359037315,2025-09-04_18-17-26,1.7789538112237562,3651948,1757002646,-72.52180366277011,7476.492438316345,4888,42.0
+cda-server-2,False,7509.6339473724365,"{'sample_time_ms': 33340.973, 'num_steps_trained': 228000, 'grad_time_ms': 368.748, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 447.1451721191406, 'policy_loss': -0.1697927862405777, 'vf_explained_var': 0.04534539952874184, 'entropy': 12.65049934387207, 'cur_lr': 4.999999873689376e-05, 'total_loss': 446.9970397949219, 'kl': 0.014240365475416183}, 'load_time_ms': 0.672, 'num_steps_sampled': 228000, 'update_time_ms': 2.524}",190,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.14150905609131,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,228000,228000,{},190,29,-93.99593714288171,2025-09-04_18-18-00,4.000255539698237,3651948,1757002680,-73.6454396869818,7509.6339473724365,4917,42.72
+cda-server-2,False,7543.576703071594,"{'sample_time_ms': 33313.244, 'num_steps_trained': 229200, 'grad_time_ms': 371.637, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 435.88616943359375, 'policy_loss': -0.16152003407478333, 'vf_explained_var': 0.028656788170337677, 'entropy': 12.934611320495605, 'cur_lr': 4.999999873689376e-05, 'total_loss': 435.74530029296875, 'kl': 0.013617919757962227}, 'load_time_ms': 0.675, 'num_steps_sampled': 229200, 'update_time_ms': 2.463}",191,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.942755699157715,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,229200,229200,{},191,28,-93.99593714288171,2025-09-04_18-18-34,4.000257012599587,3651948,1757002714,-73.94141473983446,7543.576703071594,4945,43.03
+cda-server-2,False,7578.093836784363,"{'sample_time_ms': 33416.772, 'num_steps_trained': 230400, 'grad_time_ms': 373.977, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 457.99609375, 'policy_loss': -0.16294731199741364, 'vf_explained_var': 0.02682061307132244, 'entropy': 13.026744842529297, 'cur_lr': 4.999999873689376e-05, 'total_loss': 457.8548278808594, 'kl': 0.01424330659210682}, 'load_time_ms': 0.682, 'num_steps_sampled': 230400, 'update_time_ms': 2.478}",192,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.517133712768555,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,230400,230400,{},192,27,-95.5472888620737,2025-09-04_18-19-08,4.000257012599587,3651948,1757002748,-73.16517307924585,7578.093836784363,4972,42.85
+cda-server-2,False,7611.424062490463,"{'sample_time_ms': 33420.426, 'num_steps_trained': 231600, 'grad_time_ms': 376.692, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 509.8204345703125, 'policy_loss': -0.16708210110664368, 'vf_explained_var': 0.019589563831686974, 'entropy': 12.410161972045898, 'cur_lr': 4.999999873689376e-05, 'total_loss': 509.67681884765625, 'kl': 0.015443297103047371}, 'load_time_ms': 0.701, 'num_steps_sampled': 231600, 'update_time_ms': 2.48}",193,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.330225706100464,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,231600,231600,{},193,29,-95.5472888620737,2025-09-04_18-19-41,4.000257012599587,3651948,1757002781,-72.8662915684726,7611.424062490463,5001,42.7
+cda-server-2,False,7644.909424304962,"{'sample_time_ms': 33358.604, 'num_steps_trained': 232800, 'grad_time_ms': 376.924, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 483.5562744140625, 'policy_loss': -0.15306442975997925, 'vf_explained_var': 0.02964412420988083, 'entropy': 12.852642059326172, 'cur_lr': 4.999999873689376e-05, 'total_loss': 483.4244384765625, 'kl': 0.013986443169414997}, 'load_time_ms': 0.712, 'num_steps_sampled': 232800, 'update_time_ms': 2.419}",194,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.4853618144989,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,232800,232800,{},194,24,-95.5472888620737,2025-09-04_18-20-15,4.000017886379702,3651948,1757002815,-76.33770108549413,7644.909424304962,5025,44.22
+cda-server-2,False,7679.199877500534,"{'sample_time_ms': 33411.757, 'num_steps_trained': 234000, 'grad_time_ms': 376.213, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 506.1232604980469, 'policy_loss': -0.1687338948249817, 'vf_explained_var': 0.03247503936290741, 'entropy': 12.751094818115234, 'cur_lr': 4.999999873689376e-05, 'total_loss': 505.9776916503906, 'kl': 0.015258345752954483}, 'load_time_ms': 0.708, 'num_steps_sampled': 234000, 'update_time_ms': 2.455}",195,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.2904531955719,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,234000,234000,{},195,31,-96.21196756648438,2025-09-04_18-20-49,4.000017886379702,3651948,1757002849,-74.40327719050362,7679.199877500534,5056,43.19
+cda-server-2,False,7713.31763625145,"{'sample_time_ms': 33330.199, 'num_steps_trained': 235200, 'grad_time_ms': 375.18, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 442.1346435546875, 'policy_loss': -0.16684409976005554, 'vf_explained_var': 0.031153075397014618, 'entropy': 12.824676513671875, 'cur_lr': 4.999999873689376e-05, 'total_loss': 441.98858642578125, 'kl': 0.013674840331077576}, 'load_time_ms': 0.716, 'num_steps_sampled': 235200, 'update_time_ms': 2.505}",196,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.11775875091553,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,235200,235200,{},196,30,-96.21196756648438,2025-09-04_18-21-23,6.002070167660171,3651948,1757002883,-72.27055099438103,7713.31763625145,5086,42.12
+cda-server-2,False,7746.8098311424255,"{'sample_time_ms': 33318.688, 'num_steps_trained': 236400, 'grad_time_ms': 376.25, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 504.9436340332031, 'policy_loss': -0.1752660572528839, 'vf_explained_var': 0.052433982491493225, 'entropy': 12.876564979553223, 'cur_lr': 4.999999873689376e-05, 'total_loss': 504.7928161621094, 'kl': 0.01613185554742813}, 'load_time_ms': 0.715, 'num_steps_sampled': 236400, 'update_time_ms': 2.528}",197,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.49219489097595,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,236400,236400,{},197,34,-96.21196756648438,2025-09-04_18-21-57,8.000242692043646,3651948,1757002917,-65.35815674165814,7746.8098311424255,5120,39.21
+cda-server-2,False,7780.365607500076,"{'sample_time_ms': 33339.802, 'num_steps_trained': 237600, 'grad_time_ms': 376.558, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 416.38665771484375, 'policy_loss': -0.1583482027053833, 'vf_explained_var': 0.008243918418884277, 'entropy': 13.000116348266602, 'cur_lr': 4.999999873689376e-05, 'total_loss': 416.25006103515625, 'kl': 0.01429493073374033}, 'load_time_ms': 0.714, 'num_steps_sampled': 237600, 'update_time_ms': 2.613}",198,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.55577635765076,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,237600,237600,{},198,25,-96.21196756648438,2025-09-04_18-22-30,8.000242692043646,3651948,1757002950,-68.68605460087782,7780.365607500076,5145,41.06
+cda-server-2,False,7813.889029741287,"{'sample_time_ms': 33355.593, 'num_steps_trained': 238800, 'grad_time_ms': 376.104, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 456.09765625, 'policy_loss': -0.15889257192611694, 'vf_explained_var': 0.02178768254816532, 'entropy': 12.65239429473877, 'cur_lr': 4.999999873689376e-05, 'total_loss': 455.9606628417969, 'kl': 0.014413093216717243}, 'load_time_ms': 0.714, 'num_steps_sampled': 238800, 'update_time_ms': 2.603}",199,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.52342224121094,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,238800,238800,{},199,27,-95.60804949833211,2025-09-04_18-23-04,8.000242692043646,3651948,1757002984,-70.32519611813332,7813.889029741287,5172,41.79
+cda-server-2,False,7846.953207492828,"{'sample_time_ms': 33347.681, 'num_steps_trained': 240000, 'grad_time_ms': 376.3, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 485.8211669921875, 'policy_loss': -0.1690763682126999, 'vf_explained_var': 0.02998235449194908, 'entropy': 12.54977798461914, 'cur_lr': 4.999999873689376e-05, 'total_loss': 485.6736755371094, 'kl': 0.014191006310284138}, 'load_time_ms': 0.703, 'num_steps_sampled': 240000, 'update_time_ms': 2.589}",200,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.06417775154114,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,240000,240000,{},200,30,-95.60804949833211,2025-09-04_18-23-37,1.0952821156691535,3651948,1757003017,-69.21935034442157,7846.953207492828,5202,41.4
+cda-server-2,False,7881.645069122314,"{'sample_time_ms': 33423.48, 'num_steps_trained': 241200, 'grad_time_ms': 375.39, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 438.78692626953125, 'policy_loss': -0.16150593757629395, 'vf_explained_var': 0.02985469438135624, 'entropy': 12.441953659057617, 'cur_lr': 4.999999873689376e-05, 'total_loss': 438.64642333984375, 'kl': 0.013840895146131516}, 'load_time_ms': 0.694, 'num_steps_sampled': 241200, 'update_time_ms': 2.612}",201,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.691861629486084,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,241200,241200,{},201,28,-95.60804949833211,2025-09-04_18-24-12,0.0010491070470486363,3651948,1757003052,-72.67710039707534,7881.645069122314,5230,43.11
+cda-server-2,False,7915.277950763702,"{'sample_time_ms': 33336.976, 'num_steps_trained': 242400, 'grad_time_ms': 373.51, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 528.7056274414062, 'policy_loss': -0.164224773645401, 'vf_explained_var': 0.013893438503146172, 'entropy': 12.260029792785645, 'cur_lr': 4.999999873689376e-05, 'total_loss': 528.563232421875, 'kl': 0.014393393881618977}, 'load_time_ms': 0.692, 'num_steps_sampled': 242400, 'update_time_ms': 2.605}",202,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.63288164138794,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,242400,242400,{},202,34,-93.56909818892126,2025-09-04_18-24-46,8.000011402220146,3651948,1757003086,-65.56545058732408,7915.277950763702,5264,39.49
+cda-server-2,False,7948.64150595665,"{'sample_time_ms': 33341.681, 'num_steps_trained': 243600, 'grad_time_ms': 372.193, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 504.7474365234375, 'policy_loss': -0.14668802917003632, 'vf_explained_var': 0.030628588050603867, 'entropy': 12.595661163330078, 'cur_lr': 4.999999873689376e-05, 'total_loss': 504.6220703125, 'kl': 0.014047231525182724}, 'load_time_ms': 0.673, 'num_steps_sampled': 243600, 'update_time_ms': 2.623}",203,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.36355519294739,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,243600,243600,{},203,29,-93.56909818892126,2025-09-04_18-25-19,8.000011402220146,3651948,1757003119,-65.31403438116699,7948.64150595665,5293,39.22
+cda-server-2,False,7982.057063341141,"{'sample_time_ms': 33334.772, 'num_steps_trained': 244800, 'grad_time_ms': 372.076, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 450.298583984375, 'policy_loss': -0.16483943164348602, 'vf_explained_var': 0.06417058408260345, 'entropy': 12.538618087768555, 'cur_lr': 4.999999873689376e-05, 'total_loss': 450.15496826171875, 'kl': 0.013958992436528206}, 'load_time_ms': 0.67, 'num_steps_sampled': 244800, 'update_time_ms': 2.644}",204,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.41555738449097,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,244800,244800,{},204,33,-93.19348353972086,2025-09-04_18-25-52,8.000032462470926,3651948,1757003152,-63.282461117190714,7982.057063341141,5326,38.18
+cda-server-2,False,8015.771010875702,"{'sample_time_ms': 33278.228, 'num_steps_trained': 246000, 'grad_time_ms': 371.012, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 446.9205322265625, 'policy_loss': -0.15768620371818542, 'vf_explained_var': 0.036962032318115234, 'entropy': 12.585735321044922, 'cur_lr': 4.999999873689376e-05, 'total_loss': 446.7854309082031, 'kl': 0.01488898042589426}, 'load_time_ms': 0.671, 'num_steps_sampled': 246000, 'update_time_ms': 2.653}",205,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.71394753456116,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,246000,246000,{},205,28,-93.63480907981032,2025-09-04_18-26-26,8.000032462470926,3651948,1757003186,-65.35059535319327,8015.771010875702,5354,39.43
+cda-server-2,False,8049.1763389110565,"{'sample_time_ms': 33207.42, 'num_steps_trained': 247200, 'grad_time_ms': 370.664, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 475.2734375, 'policy_loss': -0.15967623889446259, 'vf_explained_var': 0.033230237662792206, 'entropy': 12.491169929504395, 'cur_lr': 4.999999873689376e-05, 'total_loss': 475.1365966796875, 'kl': 0.015001079998910427}, 'load_time_ms': 0.659, 'num_steps_sampled': 247200, 'update_time_ms': 2.602}",206,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.405328035354614,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,247200,247200,{},206,29,-95.0799406703004,2025-09-04_18-26-59,8.000032462470926,3651948,1757003219,-67.66203927731692,8049.1763389110565,5383,40.71
+cda-server-2,False,8083.296813249588,"{'sample_time_ms': 33269.992, 'num_steps_trained': 248400, 'grad_time_ms': 370.995, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 446.5125427246094, 'policy_loss': -0.17522785067558289, 'vf_explained_var': 0.03150990977883339, 'entropy': 12.790533065795898, 'cur_lr': 4.999999873689376e-05, 'total_loss': 446.3612060546875, 'kl': 0.015740180388092995}, 'load_time_ms': 0.67, 'num_steps_sampled': 248400, 'update_time_ms': 2.575}",207,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.120474338531494,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,248400,248400,{},207,29,-95.0799406703004,2025-09-04_18-27-34,8.000032462470926,3651948,1757003254,-68.76872612698419,8083.296813249588,5412,41.03
+cda-server-2,False,8116.955354452133,"{'sample_time_ms': 33280.061, 'num_steps_trained': 249600, 'grad_time_ms': 371.251, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 547.6825561523438, 'policy_loss': -0.1566251516342163, 'vf_explained_var': 0.02517450600862503, 'entropy': 12.358968734741211, 'cur_lr': 4.999999873689376e-05, 'total_loss': 547.5484008789062, 'kl': 0.014785553328692913}, 'load_time_ms': 0.675, 'num_steps_sampled': 249600, 'update_time_ms': 2.515}",208,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.658541202545166,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,249600,249600,{},208,29,-95.0799406703004,2025-09-04_18-28-07,4.000291038650434,3651948,1757003287,-68.98315664091089,8116.955354452133,5441,41.1
+cda-server-2,False,8151.842911720276,"{'sample_time_ms': 33417.949, 'num_steps_trained': 250800, 'grad_time_ms': 369.804, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 447.9426574707031, 'policy_loss': -0.16055484116077423, 'vf_explained_var': 0.016253961250185966, 'entropy': 12.735525131225586, 'cur_lr': 4.999999873689376e-05, 'total_loss': 447.80487060546875, 'kl': 0.015008926391601562}, 'load_time_ms': 0.671, 'num_steps_sampled': 250800, 'update_time_ms': 2.51}",209,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.8875572681427,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,250800,250800,{},209,28,-93.77040153773555,2025-09-04_18-28-42,4.000291038650434,3651948,1757003322,-70.58435784835198,8151.842911720276,5469,41.72
+cda-server-2,False,8185.894082307816,"{'sample_time_ms': 33518.732, 'num_steps_trained': 252000, 'grad_time_ms': 367.734, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 510.38629150390625, 'policy_loss': -0.16351114213466644, 'vf_explained_var': 0.025241592898964882, 'entropy': 12.427091598510742, 'cur_lr': 4.999999873689376e-05, 'total_loss': 510.2453308105469, 'kl': 0.014884104020893574}, 'load_time_ms': 0.687, 'num_steps_sampled': 252000, 'update_time_ms': 2.513}",210,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.05117058753967,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,252000,252000,{},210,33,-93.77040153773555,2025-09-04_18-29-16,6.000473203830543,3651948,1757003356,-68.06821898003427,8185.894082307816,5502,40.71
+cda-server-2,False,8219.247455835342,"{'sample_time_ms': 33386.554, 'num_steps_trained': 253200, 'grad_time_ms': 366.039, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 449.7999267578125, 'policy_loss': -0.1619112491607666, 'vf_explained_var': 0.03780033811926842, 'entropy': 12.094733238220215, 'cur_lr': 4.999999873689376e-05, 'total_loss': 449.66009521484375, 'kl': 0.014554371125996113}, 'load_time_ms': 0.687, 'num_steps_sampled': 253200, 'update_time_ms': 2.46}",211,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.353373527526855,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,253200,253200,{},211,32,-93.6279369839979,2025-09-04_18-29-50,8.000000667069283,3651948,1757003390,-65.20491987533221,8219.247455835342,5534,39.3
+cda-server-2,False,8252.84744977951,"{'sample_time_ms': 33385.07, 'num_steps_trained': 254400, 'grad_time_ms': 364.229, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 497.3307800292969, 'policy_loss': -0.16077612340450287, 'vf_explained_var': 0.035886500030756, 'entropy': 12.381339073181152, 'cur_lr': 4.999999873689376e-05, 'total_loss': 497.1937255859375, 'kl': 0.015604168176651001}, 'load_time_ms': 0.679, 'num_steps_sampled': 254400, 'update_time_ms': 2.486}",212,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.59999394416809,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,254400,254400,{},212,30,-93.6279369839979,2025-09-04_18-30-23,8.000000667069283,3651948,1757003423,-62.45875354009866,8252.84744977951,5564,38.05
+cda-server-2,False,8287.260428905487,"{'sample_time_ms': 33490.409, 'num_steps_trained': 255600, 'grad_time_ms': 363.844, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 512.883056640625, 'policy_loss': -0.17092293500900269, 'vf_explained_var': 0.027442097663879395, 'entropy': 12.155288696289062, 'cur_lr': 4.999999873689376e-05, 'total_loss': 512.7354736328125, 'kl': 0.015318612568080425}, 'load_time_ms': 0.677, 'num_steps_sampled': 255600, 'update_time_ms': 2.464}",213,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.41297912597656,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,255600,255600,{},213,31,-93.6279369839979,2025-09-04_18-30-58,8.000000667069283,3651948,1757003458,-61.96758689936211,8287.260428905487,5595,37.75
+cda-server-2,False,8320.60078382492,"{'sample_time_ms': 33482.986, 'num_steps_trained': 256800, 'grad_time_ms': 363.767, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 459.37249755859375, 'policy_loss': -0.16169892251491547, 'vf_explained_var': 0.02173599973320961, 'entropy': 12.374711036682129, 'cur_lr': 4.999999873689376e-05, 'total_loss': 459.2326354980469, 'kl': 0.014371867291629314}, 'load_time_ms': 0.677, 'num_steps_sampled': 256800, 'update_time_ms': 2.455}",214,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.340354919433594,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,256800,256800,{},214,33,-93.6279369839979,2025-09-04_18-31-31,6.000034402189836,3651948,1757003491,-63.38828660104511,8320.60078382492,5628,38.32
+cda-server-2,False,8354.387178182602,"{'sample_time_ms': 33488.327, 'num_steps_trained': 258000, 'grad_time_ms': 365.69, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 504.76953125, 'policy_loss': -0.1607033908367157, 'vf_explained_var': 0.014909658581018448, 'entropy': 12.613929748535156, 'cur_lr': 4.999999873689376e-05, 'total_loss': 504.6307373046875, 'kl': 0.014423470944166183}, 'load_time_ms': 0.691, 'num_steps_sampled': 258000, 'update_time_ms': 2.411}",215,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.786394357681274,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,258000,258000,{},215,32,-93.63772402806477,2025-09-04_18-32-05,6.000006885068439,3651948,1757003525,-62.11349040983944,8354.387178182602,5660,37.78
+cda-server-2,False,8388.01570558548,"{'sample_time_ms': 33508.098, 'num_steps_trained': 259200, 'grad_time_ms': 368.138, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 484.1788330078125, 'policy_loss': -0.16928161680698395, 'vf_explained_var': 0.028671972453594208, 'entropy': 12.11899471282959, 'cur_lr': 4.999999873689376e-05, 'total_loss': 484.0320129394531, 'kl': 0.01483107265084982}, 'load_time_ms': 0.699, 'num_steps_sampled': 259200, 'update_time_ms': 2.44}",216,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.62852740287781,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,259200,259200,{},216,30,-94.87738322979997,2025-09-04_18-32-39,6.000033280248075,3651948,1757003559,-61.03114630598341,8388.01570558548,5690,37.16
+cda-server-2,False,8421.57034111023,"{'sample_time_ms': 33450.914, 'num_steps_trained': 260400, 'grad_time_ms': 368.741, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 474.89111328125, 'policy_loss': -0.16506989300251007, 'vf_explained_var': 0.017899474129080772, 'entropy': 12.297295570373535, 'cur_lr': 4.999999873689376e-05, 'total_loss': 474.7478942871094, 'kl': 0.014392748475074768}, 'load_time_ms': 0.683, 'num_steps_sampled': 260400, 'update_time_ms': 2.465}",217,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.554635524749756,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,260400,260400,{},217,36,-94.87738322979997,2025-09-04_18-33-12,6.000033280248075,3651948,1757003592,-58.55471967135243,8421.57034111023,5726,36.21
+cda-server-2,False,8455.177167654037,"{'sample_time_ms': 33444.751, 'num_steps_trained': 261600, 'grad_time_ms': 369.745, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 457.5904846191406, 'policy_loss': -0.15295682847499847, 'vf_explained_var': 0.02529967576265335, 'entropy': 12.191746711730957, 'cur_lr': 4.999999873689376e-05, 'total_loss': 457.4587097167969, 'kl': 0.013913111761212349}, 'load_time_ms': 0.69, 'num_steps_sampled': 261600, 'update_time_ms': 2.452}",218,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.60682654380798,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,261600,261600,{},218,30,-94.87738322979997,2025-09-04_18-33-46,6.000033280248075,3651948,1757003626,-59.80524415553662,8455.177167654037,5756,36.98
+cda-server-2,False,8488.736039161682,"{'sample_time_ms': 33312.104, 'num_steps_trained': 262800, 'grad_time_ms': 369.461, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 502.29730224609375, 'policy_loss': -0.17624081671237946, 'vf_explained_var': 0.02473224513232708, 'entropy': 12.325740814208984, 'cur_lr': 4.999999873689376e-05, 'total_loss': 502.14306640625, 'kl': 0.01449984684586525}, 'load_time_ms': 0.687, 'num_steps_sampled': 262800, 'update_time_ms': 2.463}",219,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.55887150764465,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,262800,262800,{},219,32,-92.68477240724081,2025-09-04_18-34-19,6.000001307149937,3651948,1757003659,-58.989134541468914,8488.736039161682,5788,36.65
+cda-server-2,False,8522.322809696198,"{'sample_time_ms': 33263.505, 'num_steps_trained': 264000, 'grad_time_ms': 371.579, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 494.7518005371094, 'policy_loss': -0.17339009046554565, 'vf_explained_var': 0.019748859107494354, 'entropy': 11.994256973266602, 'cur_lr': 4.999999873689376e-05, 'total_loss': 494.6004638671875, 'kl': 0.014561583288013935}, 'load_time_ms': 0.681, 'num_steps_sampled': 264000, 'update_time_ms': 2.506}",220,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.58677053451538,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,264000,264000,{},220,33,-92.68477240724081,2025-09-04_18-34-53,6.000001307149937,3651948,1757003693,-61.583950956835054,8522.322809696198,5821,37.68
+cda-server-2,False,8556.517718076706,"{'sample_time_ms': 33347.762, 'num_steps_trained': 265200, 'grad_time_ms': 371.472, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 480.7444763183594, 'policy_loss': -0.17422175407409668, 'vf_explained_var': 0.03855053707957268, 'entropy': 12.05868911743164, 'cur_lr': 4.999999873689376e-05, 'total_loss': 480.5936279296875, 'kl': 0.015407336875796318}, 'load_time_ms': 0.685, 'num_steps_sampled': 265200, 'update_time_ms': 2.58}",221,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.19490838050842,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,265200,265200,{},221,34,-92.68477240724081,2025-09-04_18-35-27,6.000001307149937,3651948,1757003727,-58.61488066202075,8556.517718076706,5855,36.22
+cda-server-2,False,8590.381103038788,"{'sample_time_ms': 33373.788, 'num_steps_trained': 266400, 'grad_time_ms': 371.794, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 465.3026123046875, 'policy_loss': -0.16076049208641052, 'vf_explained_var': 0.013659258373081684, 'entropy': 12.263897895812988, 'cur_lr': 4.999999873689376e-05, 'total_loss': 465.1667175292969, 'kl': 0.016368364915251732}, 'load_time_ms': 0.695, 'num_steps_sampled': 266400, 'update_time_ms': 2.548}",222,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.86338496208191,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,266400,266400,{},222,34,-92.65719252126992,2025-09-04_18-36-01,8.000000506173045,3651948,1757003761,-57.24653802731454,8590.381103038788,5889,35.82
+cda-server-2,False,8624.816487312317,"{'sample_time_ms': 33374.658, 'num_steps_trained': 267600, 'grad_time_ms': 373.171, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 450.2386474609375, 'policy_loss': -0.17680014669895172, 'vf_explained_var': 0.023019777610898018, 'entropy': 11.894817352294922, 'cur_lr': 4.999999873689376e-05, 'total_loss': 450.0855407714844, 'kl': 0.015569154173135757}, 'load_time_ms': 0.705, 'num_steps_sampled': 267600, 'update_time_ms': 2.51}",223,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.43538427352905,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,267600,267600,{},223,34,-92.8331001949819,2025-09-04_18-36-36,8.000000506173045,3651948,1757003796,-56.41039121202906,8624.816487312317,5923,35.46
+cda-server-2,False,8657.887679338455,"{'sample_time_ms': 33349.505, 'num_steps_trained': 268800, 'grad_time_ms': 371.394, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 537.3810424804688, 'policy_loss': -0.16604220867156982, 'vf_explained_var': 0.01993529684841633, 'entropy': 11.660624504089355, 'cur_lr': 4.999999873689376e-05, 'total_loss': 537.23876953125, 'kl': 0.015647679567337036}, 'load_time_ms': 0.701, 'num_steps_sampled': 268800, 'update_time_ms': 2.556}",224,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.071192026138306,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,268800,268800,{},224,29,-93.81865513420475,2025-09-04_18-37-09,8.000000506173045,3651948,1757003829,-59.83576275009328,8657.887679338455,5952,36.98
+cda-server-2,False,8691.416977643967,"{'sample_time_ms': 33326.098, 'num_steps_trained': 270000, 'grad_time_ms': 369.077, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 508.33740234375, 'policy_loss': -0.16329897940158844, 'vf_explained_var': 0.02697630040347576, 'entropy': 12.403926849365234, 'cur_lr': 4.999999873689376e-05, 'total_loss': 508.1960754394531, 'kl': 0.014454166404902935}, 'load_time_ms': 0.695, 'num_steps_sampled': 270000, 'update_time_ms': 2.584}",225,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.529298305511475,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,270000,270000,{},225,31,-93.81865513420475,2025-09-04_18-37-42,8.000000406666924,3651948,1757003862,-62.80773614513645,8691.416977643967,5983,38.18
+cda-server-2,False,8725.05351448059,"{'sample_time_ms': 33329.061, 'num_steps_trained': 271200, 'grad_time_ms': 366.944, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 487.5612487792969, 'policy_loss': -0.16774672269821167, 'vf_explained_var': 0.01808946020901203, 'entropy': 12.464456558227539, 'cur_lr': 4.999999873689376e-05, 'total_loss': 487.41497802734375, 'kl': 0.014172756113111973}, 'load_time_ms': 0.682, 'num_steps_sampled': 271200, 'update_time_ms': 2.571}",226,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.636536836624146,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,271200,271200,{},226,30,-93.81865513420475,2025-09-04_18-38-16,8.000000433543274,3651948,1757003896,-64.92589373827938,8725.05351448059,6013,39.31
+cda-server-2,False,8758.123383283615,"{'sample_time_ms': 33281.457, 'num_steps_trained': 272400, 'grad_time_ms': 366.033, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 469.1771240234375, 'policy_loss': -0.17603828012943268, 'vf_explained_var': 0.030873127281665802, 'entropy': 11.629398345947266, 'cur_lr': 4.999999873689376e-05, 'total_loss': 469.0252380371094, 'kl': 0.015914278104901314}, 'load_time_ms': 0.684, 'num_steps_sampled': 272400, 'update_time_ms': 2.578}",227,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.06986880302429,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,272400,272400,{},227,32,-92.0961907308189,2025-09-04_18-38-49,8.000000433543274,3651948,1757003929,-65.39002211193822,8758.123383283615,6045,39.64
+cda-server-2,False,8791.669610738754,"{'sample_time_ms': 33276.525, 'num_steps_trained': 273600, 'grad_time_ms': 364.904, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 480.46771240234375, 'policy_loss': -0.15781445801258087, 'vf_explained_var': 0.02216174267232418, 'entropy': 11.83214282989502, 'cur_lr': 4.999999873689376e-05, 'total_loss': 480.3331298828125, 'kl': 0.015275244601070881}, 'load_time_ms': 0.674, 'num_steps_sampled': 273600, 'update_time_ms': 2.595}",228,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.54622745513916,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,273600,273600,{},228,33,-92.50693433778561,2025-09-04_18-39-22,8.000000433543274,3651948,1757003962,-62.27324965840894,8791.669610738754,6078,37.92
+cda-server-2,False,8825.53575849533,"{'sample_time_ms': 33305.359, 'num_steps_trained': 274800, 'grad_time_ms': 366.757, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 526.8289794921875, 'policy_loss': -0.16418081521987915, 'vf_explained_var': 0.01841430552303791, 'entropy': 11.820015907287598, 'cur_lr': 4.999999873689376e-05, 'total_loss': 526.6889038085938, 'kl': 0.015894444659352303}, 'load_time_ms': 0.681, 'num_steps_sampled': 274800, 'update_time_ms': 2.626}",229,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.86614775657654,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,274800,274800,{},229,33,-93.84268985082524,2025-09-04_18-39-56,6.0005564529203665,3651948,1757003996,-61.21300704792242,8825.53575849533,6111,37.23
+cda-server-2,False,8859.073387145996,"{'sample_time_ms': 33299.837, 'num_steps_trained': 276000, 'grad_time_ms': 367.377, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 518.92041015625, 'policy_loss': -0.16965606808662415, 'vf_explained_var': 0.027118226513266563, 'entropy': 11.502217292785645, 'cur_lr': 4.999999873689376e-05, 'total_loss': 518.77490234375, 'kl': 0.01592307724058628}, 'load_time_ms': 0.692, 'num_steps_sampled': 276000, 'update_time_ms': 2.589}",230,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.53762865066528,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,276000,276000,{},230,32,-93.84268985082524,2025-09-04_18-40-30,6.0005564529203665,3651948,1757004030,-59.24914097370995,8859.073387145996,6143,36.38
+cda-server-2,False,8892.482960700989,"{'sample_time_ms': 33219.391, 'num_steps_trained': 277200, 'grad_time_ms': 369.319, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 493.50482177734375, 'policy_loss': -0.17525163292884827, 'vf_explained_var': 0.026482833549380302, 'entropy': 11.950725555419922, 'cur_lr': 4.999999873689376e-05, 'total_loss': 493.3524169921875, 'kl': 0.01507889200001955}, 'load_time_ms': 0.695, 'num_steps_sampled': 277200, 'update_time_ms': 2.542}",231,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.409573554992676,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,277200,277200,{},231,35,-93.93805342725528,2025-09-04_18-41-03,6.000051157608556,3651948,1757004063,-58.93394690622315,8892.482960700989,6178,36.31
+cda-server-2,False,8926.56376671791,"{'sample_time_ms': 33239.182, 'num_steps_trained': 278400, 'grad_time_ms': 371.247, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 477.0397033691406, 'policy_loss': -0.1522827297449112, 'vf_explained_var': 0.024695463478565216, 'entropy': 12.114645004272461, 'cur_lr': 4.999999873689376e-05, 'total_loss': 476.9099426269531, 'kl': 0.014842814765870571}, 'load_time_ms': 0.7, 'num_steps_sampled': 278400, 'update_time_ms': 2.557}",232,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.080806016922,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,278400,278400,{},232,35,-93.93805342725528,2025-09-04_18-41-37,6.000051157608556,3651948,1757004097,-57.25040114733103,8926.56376671791,6213,35.59
+cda-server-2,False,8960.791088581085,"{'sample_time_ms': 33220.342, 'num_steps_trained': 279600, 'grad_time_ms': 369.251, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 463.5008544921875, 'policy_loss': -0.18232877552509308, 'vf_explained_var': 0.022623876109719276, 'entropy': 11.372271537780762, 'cur_lr': 4.999999873689376e-05, 'total_loss': 463.34088134765625, 'kl': 0.014735642820596695}, 'load_time_ms': 0.685, 'num_steps_sampled': 279600, 'update_time_ms': 2.589}",233,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.22732186317444,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,279600,279600,{},233,33,-93.93805342725528,2025-09-04_18-42-12,6.000025070402176,3651948,1757004132,-55.39362406723989,8960.791088581085,6246,34.64
+cda-server-2,False,8994.085668563843,"{'sample_time_ms': 33242.289, 'num_steps_trained': 280800, 'grad_time_ms': 369.641, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 465.8112487792969, 'policy_loss': -0.15602374076843262, 'vf_explained_var': 0.043435726314783096, 'entropy': 12.00288200378418, 'cur_lr': 4.999999873689376e-05, 'total_loss': 465.6771240234375, 'kl': 0.014424502849578857}, 'load_time_ms': 0.689, 'num_steps_sampled': 280800, 'update_time_ms': 2.565}",234,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.29457998275757,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,280800,280800,{},234,32,-93.07855688625773,2025-09-04_18-42-45,6.000025070402176,3651948,1757004165,-56.94350179907652,8994.085668563843,6278,35.68
+cda-server-2,False,9027.672051429749,"{'sample_time_ms': 33245.241, 'num_steps_trained': 282000, 'grad_time_ms': 372.427, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 537.2978515625, 'policy_loss': -0.16486559808254242, 'vf_explained_var': 0.02764366753399372, 'entropy': 11.935712814331055, 'cur_lr': 4.999999873689376e-05, 'total_loss': 537.1552124023438, 'kl': 0.01467643678188324}, 'load_time_ms': 0.694, 'num_steps_sampled': 282000, 'update_time_ms': 2.541}",235,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.58638286590576,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,282000,282000,{},235,30,-93.07855688625773,2025-09-04_18-43-19,6.000025070402176,3651948,1757004199,-58.26857565715705,9027.672051429749,6308,36.4
+cda-server-2,False,9061.819860935211,"{'sample_time_ms': 33297.025, 'num_steps_trained': 283200, 'grad_time_ms': 371.802, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 552.9977416992188, 'policy_loss': -0.15926803648471832, 'vf_explained_var': 0.023812182247638702, 'entropy': 11.88892650604248, 'cur_lr': 4.999999873689376e-05, 'total_loss': 552.8607177734375, 'kl': 0.014615191146731377}, 'load_time_ms': 0.695, 'num_steps_sampled': 283200, 'update_time_ms': 2.536}",236,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.14780950546265,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,283200,283200,{},236,33,-92.59710856730977,2025-09-04_18-43-53,8.000068323775915,3651948,1757004233,-61.44614542643216,9061.819860935211,6341,37.82
+cda-server-2,False,9095.312840461731,"{'sample_time_ms': 33338.48, 'num_steps_trained': 284400, 'grad_time_ms': 372.653, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 462.8885192871094, 'policy_loss': -0.18361049890518188, 'vf_explained_var': 0.02978028915822506, 'entropy': 11.811455726623535, 'cur_lr': 4.999999873689376e-05, 'total_loss': 462.72698974609375, 'kl': 0.014550920575857162}, 'load_time_ms': 0.698, 'num_steps_sampled': 284400, 'update_time_ms': 2.543}",237,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.492979526519775,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,284400,284400,{},237,31,-95.66316184995254,2025-09-04_18-44-26,8.000068323775915,3651948,1757004266,-63.84925295945575,9095.312840461731,6372,38.73
+cda-server-2,False,9128.5523583889,"{'sample_time_ms': 33307.224, 'num_steps_trained': 285600, 'grad_time_ms': 373.24, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 482.65777587890625, 'policy_loss': -0.16565656661987305, 'vf_explained_var': 0.0365450456738472, 'entropy': 12.015816688537598, 'cur_lr': 4.999999873689376e-05, 'total_loss': 482.5158386230469, 'kl': 0.015651242807507515}, 'load_time_ms': 0.704, 'num_steps_sampled': 285600, 'update_time_ms': 2.529}",238,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.2395179271698,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,285600,285600,{},238,34,-95.66316184995254,2025-09-04_18-45-00,8.000068323775915,3651948,1757004300,-62.42745717113413,9128.5523583889,6406,38.06
+cda-server-2,False,9163.238487005234,"{'sample_time_ms': 33387.815, 'num_steps_trained': 286800, 'grad_time_ms': 374.688, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 439.43817138671875, 'policy_loss': -0.17008011043071747, 'vf_explained_var': 0.025046832859516144, 'entropy': 12.047761917114258, 'cur_lr': 4.999999873689376e-05, 'total_loss': 439.2913513183594, 'kl': 0.01528315432369709}, 'load_time_ms': 0.698, 'num_steps_sampled': 286800, 'update_time_ms': 2.507}",239,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.68612861633301,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,286800,286800,{},239,30,-95.66316184995254,2025-09-04_18-45-34,6.00007350824956,3651948,1757004334,-63.69194950752015,9163.238487005234,6436,38.83
+cda-server-2,False,9197.054631233215,"{'sample_time_ms': 33417.206, 'num_steps_trained': 288000, 'grad_time_ms': 373.11, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 479.13433837890625, 'policy_loss': -0.16308458149433136, 'vf_explained_var': 0.041000742465257645, 'entropy': 12.105916976928711, 'cur_lr': 4.999999873689376e-05, 'total_loss': 478.9933776855469, 'kl': 0.01457090862095356}, 'load_time_ms': 0.705, 'num_steps_sampled': 288000, 'update_time_ms': 2.543}",240,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.81614422798157,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,288000,288000,{},240,32,-91.67071305108782,2025-09-04_18-46-08,6.00007350824956,3651948,1757004368,-58.72263234325682,9197.054631233215,6468,36.91
+cda-server-2,False,9230.33184838295,"{'sample_time_ms': 33405.928, 'num_steps_trained': 289200, 'grad_time_ms': 371.198, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 472.53179931640625, 'policy_loss': -0.16461673378944397, 'vf_explained_var': 0.016277603805065155, 'entropy': 12.22976303100586, 'cur_lr': 4.999999873689376e-05, 'total_loss': 472.39068603515625, 'kl': 0.015477120876312256}, 'load_time_ms': 0.703, 'num_steps_sampled': 289200, 'update_time_ms': 2.505}",241,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.2772171497345,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,289200,289200,{},241,35,-92.50884099974769,2025-09-04_18-46-41,6.000044439385878,3651948,1757004401,-58.68430621255006,9230.33184838295,6503,36.9
+cda-server-2,False,9264.127333402634,"{'sample_time_ms': 33377.388, 'num_steps_trained': 290400, 'grad_time_ms': 371.17, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 457.06732177734375, 'policy_loss': -0.17669327557086945, 'vf_explained_var': 0.02635866403579712, 'entropy': 11.52662181854248, 'cur_lr': 4.999999873689376e-05, 'total_loss': 456.9132080078125, 'kl': 0.01483425684273243}, 'load_time_ms': 0.713, 'num_steps_sampled': 290400, 'update_time_ms': 2.495}",242,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.79548501968384,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,290400,290400,{},242,33,-94.35696984520187,2025-09-04_18-47-15,8.000000402653834,3651948,1757004435,-58.24223301701984,9264.127333402634,6536,36.42
+cda-server-2,False,9298.336977005005,"{'sample_time_ms': 33374.345, 'num_steps_trained': 291600, 'grad_time_ms': 372.454, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 497.88671875, 'policy_loss': -0.16106237471103668, 'vf_explained_var': 0.03792598471045494, 'entropy': 11.966264724731445, 'cur_lr': 4.999999873689376e-05, 'total_loss': 497.7486877441406, 'kl': 0.015170086175203323}, 'load_time_ms': 0.711, 'num_steps_sampled': 291600, 'update_time_ms': 2.501}",243,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.209643602371216,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,291600,291600,{},243,30,-94.35696984520187,2025-09-04_18-47-49,8.000000402653834,3651948,1757004469,-57.96694031513576,9298.336977005005,6566,36.01
+cda-server-2,False,9331.860694169998,"{'sample_time_ms': 33395.731, 'num_steps_trained': 292800, 'grad_time_ms': 373.965, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 564.9078979492188, 'policy_loss': -0.16683726012706757, 'vf_explained_var': 0.02323988452553749, 'entropy': 11.774674415588379, 'cur_lr': 4.999999873689376e-05, 'total_loss': 564.7649536132812, 'kl': 0.0157768651843071}, 'load_time_ms': 0.709, 'num_steps_sampled': 292800, 'update_time_ms': 2.473}",244,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.523717164993286,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,292800,292800,{},244,35,-94.35696984520187,2025-09-04_18-48-23,8.000000402653834,3651948,1757004503,-58.10606370958501,9331.860694169998,6601,36.2
+cda-server-2,False,9365.360492706299,"{'sample_time_ms': 33388.358, 'num_steps_trained': 294000, 'grad_time_ms': 372.607, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 525.4404907226562, 'policy_loss': -0.17092472314834595, 'vf_explained_var': 0.030833972617983818, 'entropy': 12.102313041687012, 'cur_lr': 4.999999873689376e-05, 'total_loss': 525.2924194335938, 'kl': 0.015038705430924892}, 'load_time_ms': 0.711, 'num_steps_sampled': 294000, 'update_time_ms': 2.535}",245,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.49979853630066,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,294000,294000,{},245,39,-92.67283625827994,2025-09-04_18-48-57,8.000000401008807,3651948,1757004537,-53.75262255852712,9365.360492706299,6640,34.39
+cda-server-2,False,9399.292599201202,"{'sample_time_ms': 33366.768, 'num_steps_trained': 295200, 'grad_time_ms': 372.608, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 503.8609619140625, 'policy_loss': -0.1675948053598404, 'vf_explained_var': 0.014279961585998535, 'entropy': 11.799020767211914, 'cur_lr': 4.999999873689376e-05, 'total_loss': 503.7156677246094, 'kl': 0.014678357169032097}, 'load_time_ms': 0.72, 'num_steps_sampled': 295200, 'update_time_ms': 2.56}",246,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.932106494903564,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,295200,295200,{},246,34,-91.12291953131009,2025-09-04_18-49-30,8.000000401008807,3651948,1757004570,-52.047525918583716,9399.292599201202,6674,33.62
+cda-server-2,False,9433.063627958298,"{'sample_time_ms': 33394.902, 'num_steps_trained': 296400, 'grad_time_ms': 372.305, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 482.9005126953125, 'policy_loss': -0.17791959643363953, 'vf_explained_var': 0.02709423191845417, 'entropy': 12.1153564453125, 'cur_lr': 4.999999873689376e-05, 'total_loss': 482.7449035644531, 'kl': 0.014708485454320908}, 'load_time_ms': 0.727, 'num_steps_sampled': 296400, 'update_time_ms': 2.534}",247,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.77102875709534,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,296400,296400,{},247,34,-91.4407548251745,2025-09-04_18-50-04,8.000000400008329,3651948,1757004604,-51.35171391690495,9433.063627958298,6708,33.29
+cda-server-2,False,9466.194394826889,"{'sample_time_ms': 33384.891, 'num_steps_trained': 297600, 'grad_time_ms': 371.499, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 510.83685302734375, 'policy_loss': -0.17384184896945953, 'vf_explained_var': 0.014701505191624165, 'entropy': 11.439382553100586, 'cur_lr': 4.999999873689376e-05, 'total_loss': 510.68658447265625, 'kl': 0.015532774850726128}, 'load_time_ms': 0.718, 'num_steps_sampled': 297600, 'update_time_ms': 2.512}",248,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.13076686859131,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,297600,297600,{},248,32,-91.44445958853663,2025-09-04_18-50-37,8.000000625473389,3651948,1757004637,-55.1829405642252,9466.194394826889,6740,34.99
+cda-server-2,False,9499.82013463974,"{'sample_time_ms': 33282.448, 'num_steps_trained': 298800, 'grad_time_ms': 367.948, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 498.81988525390625, 'policy_loss': -0.15983320772647858, 'vf_explained_var': 0.018894175067543983, 'entropy': 11.790884971618652, 'cur_lr': 4.999999873689376e-05, 'total_loss': 498.68408203125, 'kl': 0.015838027000427246}, 'load_time_ms': 0.72, 'num_steps_sampled': 298800, 'update_time_ms': 2.49}",249,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.62573981285095,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,298800,298800,{},249,34,-93.8386863279901,2025-09-04_18-51-11,8.000000625473389,3651948,1757004671,-57.87083837237296,9499.82013463974,6774,36.09
+cda-server-2,False,9533.402312994003,"{'sample_time_ms': 33261.054, 'num_steps_trained': 300000, 'grad_time_ms': 366.013, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 493.55804443359375, 'policy_loss': -0.16028568148612976, 'vf_explained_var': 0.02670077420771122, 'entropy': 11.616454124450684, 'cur_lr': 4.999999873689376e-05, 'total_loss': 493.42083740234375, 'kl': 0.015181425958871841}, 'load_time_ms': 0.696, 'num_steps_sampled': 300000, 'update_time_ms': 2.449}",250,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.582178354263306,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,300000,300000,{},250,36,-93.8386863279901,2025-09-04_18-51-45,8.000000625473389,3651948,1757004705,-57.23362154027995,9533.402312994003,6810,35.7
+cda-server-2,False,9566.642753839493,"{'sample_time_ms': 33255.008, 'num_steps_trained': 301200, 'grad_time_ms': 368.254, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 444.5202331542969, 'policy_loss': -0.16525143384933472, 'vf_explained_var': 0.032825905829668045, 'entropy': 11.39554214477539, 'cur_lr': 4.999999873689376e-05, 'total_loss': 444.3780822753906, 'kl': 0.015219918452203274}, 'load_time_ms': 0.699, 'num_steps_sampled': 301200, 'update_time_ms': 2.562}",251,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.2404408454895,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,301200,301200,{},251,33,-93.07010464848874,2025-09-04_18-52-18,8.00000040304245,3651948,1757004738,-55.47286553294055,9566.642753839493,6843,35.11
+cda-server-2,False,9600.813539981842,"{'sample_time_ms': 33293.166, 'num_steps_trained': 302400, 'grad_time_ms': 367.724, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 490.7688903808594, 'policy_loss': -0.181904137134552, 'vf_explained_var': 0.03228212893009186, 'entropy': 12.051400184631348, 'cur_lr': 4.999999873689376e-05, 'total_loss': 490.61029052734375, 'kl': 0.015358841978013515}, 'load_time_ms': 0.675, 'num_steps_sampled': 302400, 'update_time_ms': 2.552}",252,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.17078614234924,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,302400,302400,{},252,37,-93.1502064041992,2025-09-04_18-52-52,6.00001167450546,3651948,1757004772,-53.44209251089664,9600.813539981842,6880,34.15
+cda-server-2,False,9634.56543135643,"{'sample_time_ms': 33249.286, 'num_steps_trained': 303600, 'grad_time_ms': 365.85, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 438.65264892578125, 'policy_loss': -0.1741490662097931, 'vf_explained_var': 0.03822045028209686, 'entropy': 11.78211498260498, 'cur_lr': 4.999999873689376e-05, 'total_loss': 438.5022888183594, 'kl': 0.015649745240807533}, 'load_time_ms': 0.685, 'num_steps_sampled': 303600, 'update_time_ms': 2.515}",253,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.75189137458801,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,303600,303600,{},253,34,-93.1502064041992,2025-09-04_18-53-26,4.055704940266937,3651948,1757004806,-53.492828528782695,9634.56543135643,6914,34.28
+cda-server-2,False,9669.068894147873,"{'sample_time_ms': 33346.713, 'num_steps_trained': 304800, 'grad_time_ms': 366.41, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 502.4017028808594, 'policy_loss': -0.176845520734787, 'vf_explained_var': 0.03880747780203819, 'entropy': 11.777851104736328, 'cur_lr': 4.999999873689376e-05, 'total_loss': 502.2478332519531, 'kl': 0.015134657733142376}, 'load_time_ms': 0.69, 'num_steps_sampled': 304800, 'update_time_ms': 2.528}",254,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.50346279144287,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,304800,304800,{},254,36,-93.38752497946227,2025-09-04_18-54-00,5.312839912494095,3651948,1757004840,-52.2676855411649,9669.068894147873,6950,33.61
+cda-server-2,False,9702.425583600998,"{'sample_time_ms': 33331.79, 'num_steps_trained': 306000, 'grad_time_ms': 367.077, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 496.5535583496094, 'policy_loss': -0.16783879697322845, 'vf_explained_var': 0.017874909564852715, 'entropy': 11.913069725036621, 'cur_lr': 4.999999873689376e-05, 'total_loss': 496.4104309082031, 'kl': 0.016264840960502625}, 'load_time_ms': 0.682, 'num_steps_sampled': 306000, 'update_time_ms': 2.473}",255,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.356689453125,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,306000,306000,{},255,35,-93.38752497946227,2025-09-04_18-54-34,6.000102246417464,3651948,1757004874,-52.95659910840507,9702.425583600998,6985,33.75
+cda-server-2,False,9736.113502502441,"{'sample_time_ms': 33306.613, 'num_steps_trained': 307200, 'grad_time_ms': 367.862, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 501.1930847167969, 'policy_loss': -0.17009110748767853, 'vf_explained_var': 0.03265717998147011, 'entropy': 11.818390846252441, 'cur_lr': 4.999999873689376e-05, 'total_loss': 501.0460510253906, 'kl': 0.015193293802440166}, 'load_time_ms': 0.682, 'num_steps_sampled': 307200, 'update_time_ms': 2.441}",256,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.68791890144348,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,307200,307200,{},256,39,-92.67251074277206,2025-09-04_18-55-07,6.000109429998887,3651948,1757004907,-49.49791261277659,9736.113502502441,7024,32.13
+cda-server-2,False,9769.884490966797,"{'sample_time_ms': 33307.422, 'num_steps_trained': 308400, 'grad_time_ms': 367.01, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 498.14794921875, 'policy_loss': -0.16582104563713074, 'vf_explained_var': 0.024809036403894424, 'entropy': 11.592876434326172, 'cur_lr': 4.999999873689376e-05, 'total_loss': 498.0042419433594, 'kl': 0.0145410830155015}, 'load_time_ms': 0.668, 'num_steps_sampled': 308400, 'update_time_ms': 2.452}",257,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.77098846435547,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,308400,308400,{},257,43,-91.53659906537581,2025-09-04_18-55-41,6.000109429998887,3651948,1757004941,-43.73236682884013,9769.884490966797,7067,29.5
+cda-server-2,False,9804.377316713333,"{'sample_time_ms': 33442.048, 'num_steps_trained': 309600, 'grad_time_ms': 368.504, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 474.3194885253906, 'policy_loss': -0.17338663339614868, 'vf_explained_var': 0.027189724147319794, 'entropy': 11.489995956420898, 'cur_lr': 4.999999873689376e-05, 'total_loss': 474.17138671875, 'kl': 0.016618233174085617}, 'load_time_ms': 0.673, 'num_steps_sampled': 309600, 'update_time_ms': 2.499}",258,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.492825746536255,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,309600,309600,{},258,39,-91.35076520477672,2025-09-04_18-56-16,4.000352388379444,3651948,1757004976,-42.154622521712774,9804.377316713333,7106,28.72
+cda-server-2,False,9837.982171058655,"{'sample_time_ms': 33437.854, 'num_steps_trained': 310800, 'grad_time_ms': 370.569, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 441.2422790527344, 'policy_loss': -0.1783483773469925, 'vf_explained_var': 0.036072149872779846, 'entropy': 11.615463256835938, 'cur_lr': 4.999999873689376e-05, 'total_loss': 441.0879211425781, 'kl': 0.015805954113602638}, 'load_time_ms': 0.67, 'num_steps_sampled': 310800, 'update_time_ms': 2.501}",259,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.604854345321655,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,310800,310800,{},259,34,-91.35076520477672,2025-09-04_18-56-49,2.0005359728741396,3651948,1757005009,-48.61172280956397,9837.982171058655,7140,31.96
+cda-server-2,False,9871.78459239006,"{'sample_time_ms': 33457.814, 'num_steps_trained': 312000, 'grad_time_ms': 372.539, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 456.3844299316406, 'policy_loss': -0.18062280118465424, 'vf_explained_var': 0.0369785837829113, 'entropy': 11.31128978729248, 'cur_lr': 4.999999873689376e-05, 'total_loss': 456.2292175292969, 'kl': 0.016710573807358742}, 'load_time_ms': 0.672, 'num_steps_sampled': 312000, 'update_time_ms': 2.575}",260,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.80242133140564,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,312000,312000,{},260,36,-92.4291552854384,2025-09-04_18-57-23,6.000080980608969,3651948,1757005043,-52.84823083865218,9871.78459239006,7176,34.19
+cda-server-2,False,9905.303673744202,"{'sample_time_ms': 33486.184, 'num_steps_trained': 313200, 'grad_time_ms': 372.131, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 445.8668212890625, 'policy_loss': -0.17220252752304077, 'vf_explained_var': 0.027600638568401337, 'entropy': 11.534998893737793, 'cur_lr': 4.999999873689376e-05, 'total_loss': 445.7190856933594, 'kl': 0.016097839921712875}, 'load_time_ms': 0.668, 'num_steps_sampled': 313200, 'update_time_ms': 2.504}",261,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.519081354141235,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,313200,313200,{},261,32,-92.4291552854384,2025-09-04_18-57-57,6.000080980608969,3651948,1757005077,-55.41658854010331,9905.303673744202,7208,35.25
+cda-server-2,False,9939.227750062943,"{'sample_time_ms': 33460.782, 'num_steps_trained': 314400, 'grad_time_ms': 372.764, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 454.6693115234375, 'policy_loss': -0.16877640783786774, 'vf_explained_var': 0.017379429191350937, 'entropy': 11.471319198608398, 'cur_lr': 4.999999873689376e-05, 'total_loss': 454.52459716796875, 'kl': 0.015870148316025734}, 'load_time_ms': 0.678, 'num_steps_sampled': 314400, 'update_time_ms': 2.522}",262,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.924076318740845,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,314400,314400,{},262,33,-92.4291552854384,2025-09-04_18-58-31,6.000086958096147,3651948,1757005111,-56.424942778632506,9939.227750062943,7241,35.48
+cda-server-2,False,9973.117554426193,"{'sample_time_ms': 33472.645, 'num_steps_trained': 315600, 'grad_time_ms': 374.627, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 514.4802856445312, 'policy_loss': -0.16580967605113983, 'vf_explained_var': 0.030511697754263878, 'entropy': 11.81280517578125, 'cur_lr': 4.999999873689376e-05, 'total_loss': 514.3388061523438, 'kl': 0.015987424179911613}, 'load_time_ms': 0.672, 'num_steps_sampled': 315600, 'update_time_ms': 2.558}",263,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.88980436325073,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,315600,315600,{},263,36,-91.77151161971379,2025-09-04_18-59-05,6.000086958096147,3651948,1757005145,-55.9312512364152,9973.117554426193,7277,35.37
+cda-server-2,False,10007.813853263855,"{'sample_time_ms': 33491.385, 'num_steps_trained': 316800, 'grad_time_ms': 375.175, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 447.14453125, 'policy_loss': -0.1734510362148285, 'vf_explained_var': 0.02611129730939865, 'entropy': 10.773795127868652, 'cur_lr': 4.999999873689376e-05, 'total_loss': 446.99371337890625, 'kl': 0.014916815795004368}, 'load_time_ms': 0.664, 'num_steps_sampled': 316800, 'update_time_ms': 2.58}",264,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.69629883766174,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,316800,316800,{},264,42,-91.53913098515123,2025-09-04_18-59-39,6.000575166421358,3651948,1757005179,-49.16608473715471,10007.813853263855,7319,32.08
+cda-server-2,False,10040.971177101135,"{'sample_time_ms': 33473.134, 'num_steps_trained': 318000, 'grad_time_ms': 373.482, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 522.8036499023438, 'policy_loss': -0.16708451509475708, 'vf_explained_var': 0.020491890609264374, 'entropy': 11.706774711608887, 'cur_lr': 4.999999873689376e-05, 'total_loss': 522.6589965820312, 'kl': 0.014792154543101788}, 'load_time_ms': 0.664, 'num_steps_sampled': 318000, 'update_time_ms': 2.586}",265,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.15732383728027,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,318000,318000,{},265,37,-91.0761048981628,2025-09-04_19-00-13,6.000575166421358,3651948,1757005213,-47.178333058434504,10040.971177101135,7356,31.15
+cda-server-2,False,10074.561195135117,"{'sample_time_ms': 33464.165, 'num_steps_trained': 319200, 'grad_time_ms': 372.685, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 446.42901611328125, 'policy_loss': -0.16864469647407532, 'vf_explained_var': 0.020345423370599747, 'entropy': 11.407777786254883, 'cur_lr': 4.999999873689376e-05, 'total_loss': 446.2832946777344, 'kl': 0.015111408196389675}, 'load_time_ms': 0.663, 'num_steps_sampled': 319200, 'update_time_ms': 2.599}",266,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.59001803398132,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,319200,319200,{},266,35,-92.17183099341096,2025-09-04_19-00-46,8.000000403929597,3651948,1757005246,-50.03188182993689,10074.561195135117,7391,32.47
+cda-server-2,False,10108.989178180695,"{'sample_time_ms': 33529.985, 'num_steps_trained': 320400, 'grad_time_ms': 372.617, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 521.6529541015625, 'policy_loss': -0.17513184249401093, 'vf_explained_var': 0.023661097511649132, 'entropy': 11.008745193481445, 'cur_lr': 4.999999873689376e-05, 'total_loss': 521.501953125, 'kl': 0.015934422612190247}, 'load_time_ms': 0.675, 'num_steps_sampled': 320400, 'update_time_ms': 2.589}",267,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.427983045578,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,320400,320400,{},267,42,-92.17183099341096,2025-09-04_19-01-21,8.000003200551006,3651948,1757005281,-47.14156567128666,10108.989178180695,7433,31.2
+cda-server-2,False,10143.616182804108,"{'sample_time_ms': 33542.544, 'num_steps_trained': 321600, 'grad_time_ms': 373.486, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 496.2048034667969, 'policy_loss': -0.1643340140581131, 'vf_explained_var': 0.01232109498232603, 'entropy': 11.45879077911377, 'cur_lr': 4.999999873689376e-05, 'total_loss': 496.0647277832031, 'kl': 0.015981314703822136}, 'load_time_ms': 0.699, 'num_steps_sampled': 321600, 'update_time_ms': 2.566}",268,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.627004623413086,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,321600,321600,{},268,42,-93.49556735773535,2025-09-04_19-01-55,8.000003200551006,3651948,1757005315,-42.24551633872482,10143.616182804108,7475,28.59
+cda-server-2,False,10177.26104593277,"{'sample_time_ms': 33547.64, 'num_steps_trained': 322800, 'grad_time_ms': 372.376, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 473.0905456542969, 'policy_loss': -0.18091005086898804, 'vf_explained_var': 0.03324628621339798, 'entropy': 11.576276779174805, 'cur_lr': 4.999999873689376e-05, 'total_loss': 472.9339599609375, 'kl': 0.015997041016817093}, 'load_time_ms': 0.696, 'num_steps_sampled': 322800, 'update_time_ms': 2.573}",269,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.64486312866211,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,322800,322800,{},269,45,-93.49556735773535,2025-09-04_19-02-29,6.000151534633431,3651948,1757005349,-41.814555740237004,10177.26104593277,7520,28.3
+cda-server-2,False,10212.144480705261,"{'sample_time_ms': 33656.169, 'num_steps_trained': 324000, 'grad_time_ms': 371.946, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 484.0221862792969, 'policy_loss': -0.16421444714069366, 'vf_explained_var': 0.01682865619659424, 'entropy': 11.210699081420898, 'cur_lr': 4.999999873689376e-05, 'total_loss': 483.8819580078125, 'kl': 0.015759721398353577}, 'load_time_ms': 0.692, 'num_steps_sampled': 324000, 'update_time_ms': 2.53}",270,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.883434772491455,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,324000,324000,{},270,43,-93.49556735773535,2025-09-04_19-03-04,6.000151534633431,3651948,1757005384,-42.90693813406929,10212.144480705261,7563,28.76
+cda-server-2,False,10245.876401901245,"{'sample_time_ms': 33679.183, 'num_steps_trained': 325200, 'grad_time_ms': 370.133, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 479.9202880859375, 'policy_loss': -0.1779698133468628, 'vf_explained_var': 0.023097369819879532, 'entropy': 10.930511474609375, 'cur_lr': 4.999999873689376e-05, 'total_loss': 479.7669372558594, 'kl': 0.016181154176592827}, 'load_time_ms': 0.695, 'num_steps_sampled': 325200, 'update_time_ms': 2.613}",271,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.73192119598389,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,325200,325200,{},271,36,-93.68366487961454,2025-09-04_19-03-38,4.000525533646388,3651948,1757005418,-42.65263693139008,10245.876401901245,7599,28.76
+cda-server-2,False,10279.449191570282,"{'sample_time_ms': 33644.665, 'num_steps_trained': 326400, 'grad_time_ms': 369.57, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 484.3214111328125, 'policy_loss': -0.16890767216682434, 'vf_explained_var': 0.033569660037755966, 'entropy': 11.223655700683594, 'cur_lr': 4.999999873689376e-05, 'total_loss': 484.1768798828125, 'kl': 0.016036422923207283}, 'load_time_ms': 0.693, 'num_steps_sampled': 326400, 'update_time_ms': 2.591}",272,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.572789669036865,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,326400,326400,{},272,41,-93.68366487961454,2025-09-04_19-04-11,8.000000473594405,3651948,1757005451,-45.00275549680281,10279.449191570282,7640,30.04
+cda-server-2,False,10313.695538282394,"{'sample_time_ms': 33680.507, 'num_steps_trained': 327600, 'grad_time_ms': 369.417, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 427.5572509765625, 'policy_loss': -0.16799139976501465, 'vf_explained_var': 0.02995210886001587, 'entropy': 11.145977020263672, 'cur_lr': 4.999999873689376e-05, 'total_loss': 427.41265869140625, 'kl': 0.01543845422565937}, 'load_time_ms': 0.715, 'num_steps_sampled': 327600, 'update_time_ms': 2.579}",273,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.24634671211243,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,327600,327600,{},273,40,-93.68366487961454,2025-09-04_19-04-45,8.000000473594405,3651948,1757005485,-46.60512579057071,10313.695538282394,7680,30.86
+cda-server-2,False,10347.263674736023,"{'sample_time_ms': 33567.666, 'num_steps_trained': 328800, 'grad_time_ms': 369.409, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 502.9129333496094, 'policy_loss': -0.17614038288593292, 'vf_explained_var': 0.022216200828552246, 'entropy': 11.216800689697266, 'cur_lr': 4.999999873689376e-05, 'total_loss': 502.7601013183594, 'kl': 0.015363307669758797}, 'load_time_ms': 0.727, 'num_steps_sampled': 328800, 'update_time_ms': 2.56}",274,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.56813645362854,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,328800,328800,{},274,38,-91.98634031365813,2025-09-04_19-05-19,8.000225089274451,3651948,1757005519,-43.859947842870355,10347.263674736023,7718,29.61
+cda-server-2,False,10381.212057828903,"{'sample_time_ms': 33646.788, 'num_steps_trained': 330000, 'grad_time_ms': 369.366, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 453.77984619140625, 'policy_loss': -0.16606299579143524, 'vf_explained_var': 0.03338143602013588, 'entropy': 10.97883415222168, 'cur_lr': 4.999999873689376e-05, 'total_loss': 453.6378479003906, 'kl': 0.015875400975346565}, 'load_time_ms': 0.726, 'num_steps_sampled': 330000, 'update_time_ms': 2.615}",275,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.94838309288025,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,330000,330000,{},275,39,-91.98634031365813,2025-09-04_19-05-53,8.000225089274451,3651948,1757005553,-46.33955160056749,10381.212057828903,7757,30.69
+cda-server-2,False,10415.166213274002,"{'sample_time_ms': 33680.273, 'num_steps_trained': 331200, 'grad_time_ms': 372.268, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 450.1102294921875, 'policy_loss': -0.17988114058971405, 'vf_explained_var': 0.025934258475899696, 'entropy': 11.465625762939453, 'cur_lr': 4.999999873689376e-05, 'total_loss': 449.95477294921875, 'kl': 0.016104480251669884}, 'load_time_ms': 0.742, 'num_steps_sampled': 331200, 'update_time_ms': 2.587}",276,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.95415544509888,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,331200,331200,{},276,37,-90.53218214614839,2025-09-04_19-06-27,8.000225089274451,3651948,1757005587,-49.45591814832428,10415.166213274002,7794,32.37
+cda-server-2,False,10450.337350845337,"{'sample_time_ms': 33754.495, 'num_steps_trained': 332400, 'grad_time_ms': 372.366, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 469.13031005859375, 'policy_loss': -0.16875097155570984, 'vf_explained_var': 0.023109469562768936, 'entropy': 11.081283569335938, 'cur_lr': 4.999999873689376e-05, 'total_loss': 468.9852294921875, 'kl': 0.015559237450361252}, 'load_time_ms': 0.73, 'num_steps_sampled': 332400, 'update_time_ms': 2.592}",277,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",35.17113757133484,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,332400,332400,{},277,43,-90.53218214614839,2025-09-04_19-07-02,8.000071739314354,3651948,1757005622,-44.07394949871933,10450.337350845337,7837,29.69
+cda-server-2,False,10484.945219278336,"{'sample_time_ms': 33754.833, 'num_steps_trained': 333600, 'grad_time_ms': 370.167, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 543.11328125, 'policy_loss': -0.17992782592773438, 'vf_explained_var': 0.018156178295612335, 'entropy': 10.869085311889648, 'cur_lr': 4.999999873689376e-05, 'total_loss': 542.9584350585938, 'kl': 0.01653093658387661}, 'load_time_ms': 0.705, 'num_steps_sampled': 333600, 'update_time_ms': 2.579}",278,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.60786843299866,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,333600,333600,{},278,41,-91.15067117704314,2025-09-04_19-07-37,8.000071739314354,3651948,1757005657,-42.293963324320195,10484.945219278336,7878,28.67
+cda-server-2,False,10518.67206120491,"{'sample_time_ms': 33760.475, 'num_steps_trained': 334800, 'grad_time_ms': 372.736, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 473.5882263183594, 'policy_loss': -0.17750756442546844, 'vf_explained_var': 0.03091849945485592, 'entropy': 11.348112106323242, 'cur_lr': 4.999999873689376e-05, 'total_loss': 473.4354248046875, 'kl': 0.01630318909883499}, 'load_time_ms': 0.713, 'num_steps_sampled': 334800, 'update_time_ms': 2.583}",279,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.72684192657471,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,334800,334800,{},279,41,-91.15067117704314,2025-09-04_19-08-11,8.000004903249033,3651948,1757005691,-44.9513265938712,10518.67206120491,7919,29.95
+cda-server-2,False,10552.616226434708,"{'sample_time_ms': 33665.889, 'num_steps_trained': 336000, 'grad_time_ms': 373.425, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 491.8249816894531, 'policy_loss': -0.17976070940494537, 'vf_explained_var': 0.02973158471286297, 'entropy': 10.85261058807373, 'cur_lr': 4.999999873689376e-05, 'total_loss': 491.6720886230469, 'kl': 0.017699040472507477}, 'load_time_ms': 0.718, 'num_steps_sampled': 336000, 'update_time_ms': 2.594}",280,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.94416522979736,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,336000,336000,{},280,38,-95.3722150015734,2025-09-04_19-08-44,8.000004903249033,3651948,1757005724,-44.50705614322201,10552.616226434708,7957,29.95
+cda-server-2,False,10586.572783470154,"{'sample_time_ms': 33685.632, 'num_steps_trained': 337200, 'grad_time_ms': 376.163, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 488.8636169433594, 'policy_loss': -0.1731945276260376, 'vf_explained_var': 0.04734458401799202, 'entropy': 11.287887573242188, 'cur_lr': 4.999999873689376e-05, 'total_loss': 488.71331787109375, 'kl': 0.015083376318216324}, 'load_time_ms': 0.716, 'num_steps_sampled': 337200, 'update_time_ms': 2.562}",281,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.95655703544617,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,337200,337200,{},281,36,-95.3722150015734,2025-09-04_19-09-19,6.000347435424667,3651948,1757005759,-48.719626192050356,10586.572783470154,7993,32.19
+cda-server-2,False,10620.745208978653,"{'sample_time_ms': 33745.513, 'num_steps_trained': 338400, 'grad_time_ms': 376.235, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 491.0636901855469, 'policy_loss': -0.17348407208919525, 'vf_explained_var': 0.02385639399290085, 'entropy': 10.733929634094238, 'cur_lr': 4.999999873689376e-05, 'total_loss': 490.9172668457031, 'kl': 0.017822520807385445}, 'load_time_ms': 0.711, 'num_steps_sampled': 338400, 'update_time_ms': 2.588}",282,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.172425508499146,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,338400,338400,{},282,45,-91.86570982637153,2025-09-04_19-09-53,6.001391594613905,3651948,1757005793,-44.18720843152826,10620.745208978653,8038,29.89
+cda-server-2,False,10654.505257368088,"{'sample_time_ms': 33698.56, 'num_steps_trained': 339600, 'grad_time_ms': 374.566, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 443.69537353515625, 'policy_loss': -0.17414651811122894, 'vf_explained_var': 0.032790109515190125, 'entropy': 11.28775691986084, 'cur_lr': 4.999999873689376e-05, 'total_loss': 443.5450744628906, 'kl': 0.015703819692134857}, 'load_time_ms': 0.684, 'num_steps_sampled': 339600, 'update_time_ms': 2.592}",283,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.760048389434814,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,339600,339600,{},283,39,-91.8255611391432,2025-09-04_19-10-26,6.001391594613905,3651948,1757005826,-43.44958326804987,10654.505257368088,8077,29.28
+cda-server-2,False,10688.552230834961,"{'sample_time_ms': 33748.997, 'num_steps_trained': 340800, 'grad_time_ms': 372.023, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 443.31854248046875, 'policy_loss': -0.18360702693462372, 'vf_explained_var': 0.026069827377796173, 'entropy': 10.978754997253418, 'cur_lr': 4.999999873689376e-05, 'total_loss': 443.1598815917969, 'kl': 0.01643957756459713}, 'load_time_ms': 0.672, 'num_steps_sampled': 340800, 'update_time_ms': 2.593}",284,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.04697346687317,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,340800,340800,{},284,38,-91.8255611391432,2025-09-04_19-11-01,8.00000043651723,3651948,1757005861,-47.55785499302104,10688.552230834961,8115,31.36
+cda-server-2,False,10721.957757472992,"{'sample_time_ms': 33694.925, 'num_steps_trained': 342000, 'grad_time_ms': 371.863, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 499.5901184082031, 'policy_loss': -0.17610689997673035, 'vf_explained_var': 0.019171714782714844, 'entropy': 10.866064071655273, 'cur_lr': 4.999999873689376e-05, 'total_loss': 499.4377746582031, 'kl': 0.015659630298614502}, 'load_time_ms': 0.668, 'num_steps_sampled': 342000, 'update_time_ms': 2.537}",285,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.405526638031006,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,342000,342000,{},285,40,-91.22999134012633,2025-09-04_19-11-34,8.000052252170565,3651948,1757005894,-45.63354421876618,10721.957757472992,8155,30.29
+cda-server-2,False,10756.137785673141,"{'sample_time_ms': 33718.46, 'num_steps_trained': 343200, 'grad_time_ms': 370.937, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 465.80670166015625, 'policy_loss': -0.17610350251197815, 'vf_explained_var': 0.034203190356492996, 'entropy': 10.926987648010254, 'cur_lr': 4.999999873689376e-05, 'total_loss': 465.6551208496094, 'kl': 0.016123224049806595}, 'load_time_ms': 0.654, 'num_steps_sampled': 343200, 'update_time_ms': 2.537}",286,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.180028200149536,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,343200,343200,{},286,41,-91.22999134012633,2025-09-04_19-12-08,8.000052252170565,3651948,1757005928,-44.48674554696858,10756.137785673141,8196,29.72
+cda-server-2,False,10790.090457201004,"{'sample_time_ms': 33595.784, 'num_steps_trained': 344400, 'grad_time_ms': 371.739, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 509.7332763671875, 'policy_loss': -0.18097235262393951, 'vf_explained_var': 0.026486733928322792, 'entropy': 11.022765159606934, 'cur_lr': 4.999999873689376e-05, 'total_loss': 509.57720947265625, 'kl': 0.016459709033370018}, 'load_time_ms': 0.657, 'num_steps_sampled': 344400, 'update_time_ms': 2.535}",287,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.95267152786255,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,344400,344400,{},287,39,-90.82597945393007,2025-09-04_19-12-42,6.000163794206194,3651948,1757005962,-43.89554236020127,10790.090457201004,8235,29.7
+cda-server-2,False,10824.339283704758,"{'sample_time_ms': 33558.228, 'num_steps_trained': 345600, 'grad_time_ms': 373.358, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 479.1529846191406, 'policy_loss': -0.17926204204559326, 'vf_explained_var': 0.021100951358675957, 'entropy': 11.003413200378418, 'cur_lr': 4.999999873689376e-05, 'total_loss': 478.9981689453125, 'kl': 0.016063014045357704}, 'load_time_ms': 0.66, 'num_steps_sampled': 345600, 'update_time_ms': 2.549}",288,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.24882650375366,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,345600,345600,{},288,47,-90.82597945393007,2025-09-04_19-13-16,8.000000787655095,3651948,1757005996,-42.089675947863626,10824.339283704758,8282,28.73
+cda-server-2,False,10857.90751862526,"{'sample_time_ms': 33545.668, 'num_steps_trained': 346800, 'grad_time_ms': 370.082, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 489.45941162109375, 'policy_loss': -0.16726961731910706, 'vf_explained_var': 0.021498076617717743, 'entropy': 11.020486831665039, 'cur_lr': 4.999999873689376e-05, 'total_loss': 489.3161315917969, 'kl': 0.015811540186405182}, 'load_time_ms': 0.656, 'num_steps_sampled': 346800, 'update_time_ms': 2.557}",289,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.56823492050171,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,346800,346800,{},289,44,-92.1446320532424,2025-09-04_19-13-50,8.000000787655095,3651948,1757006030,-38.038023044437594,10857.90751862526,8326,26.54
+cda-server-2,False,10891.880641222,"{'sample_time_ms': 33547.73, 'num_steps_trained': 348000, 'grad_time_ms': 370.899, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 477.1534423828125, 'policy_loss': -0.18578127026557922, 'vf_explained_var': 0.020566217601299286, 'entropy': 11.158267974853516, 'cur_lr': 4.999999873689376e-05, 'total_loss': 476.99273681640625, 'kl': 0.016526976600289345}, 'load_time_ms': 0.674, 'num_steps_sampled': 348000, 'update_time_ms': 2.548}",290,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.97312259674072,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,348000,348000,{},290,40,-92.1446320532424,2025-09-04_19-14-24,6.00023374893414,3651948,1757006064,-43.65306031492569,10891.880641222,8366,29.34
+cda-server-2,False,10927.529315710068,"{'sample_time_ms': 33717.148, 'num_steps_trained': 349200, 'grad_time_ms': 370.706, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 486.17718505859375, 'policy_loss': -0.16736049950122833, 'vf_explained_var': 0.017367621883749962, 'entropy': 11.131400108337402, 'cur_lr': 4.999999873689376e-05, 'total_loss': 486.0347900390625, 'kl': 0.01641010493040085}, 'load_time_ms': 0.685, 'num_steps_sampled': 349200, 'update_time_ms': 2.533}",291,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",35.64867448806763,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,349200,349200,{},291,41,-92.21221438592339,2025-09-04_19-15-00,8.000000400008378,3651948,1757006100,-43.812856947642615,10927.529315710068,8407,29.49
+cda-server-2,False,10963.153591632843,"{'sample_time_ms': 33864.213, 'num_steps_trained': 350400, 'grad_time_ms': 368.848, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 459.4781799316406, 'policy_loss': -0.17313633859157562, 'vf_explained_var': 0.01630322076380253, 'entropy': 10.939611434936523, 'cur_lr': 4.999999873689376e-05, 'total_loss': 459.33050537109375, 'kl': 0.016762765124440193}, 'load_time_ms': 0.687, 'num_steps_sampled': 350400, 'update_time_ms': 2.517}",292,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",35.62427592277527,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,350400,350400,{},292,41,-92.72521688390759,2025-09-04_19-15-35,8.000000400008378,3651948,1757006135,-44.099940482126186,10963.153591632843,8448,29.8
+cda-server-2,False,10997.174255371094,"{'sample_time_ms': 33888.988, 'num_steps_trained': 351600, 'grad_time_ms': 370.137, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 427.449462890625, 'policy_loss': -0.1794745922088623, 'vf_explained_var': 0.028868675231933594, 'entropy': 11.113698959350586, 'cur_lr': 4.999999873689376e-05, 'total_loss': 427.29345703125, 'kl': 0.015439452603459358}, 'load_time_ms': 0.692, 'num_steps_sampled': 351600, 'update_time_ms': 2.493}",293,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.02066373825073,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,351600,351600,{},293,43,-92.72521688390759,2025-09-04_19-16-09,8.000000414224198,3651948,1757006169,-42.17512389247495,10997.174255371094,8491,29.01
+cda-server-2,False,11031.405236959457,"{'sample_time_ms': 33907.047, 'num_steps_trained': 352800, 'grad_time_ms': 370.504, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 510.30328369140625, 'policy_loss': -0.18082945048809052, 'vf_explained_var': 0.014124538749456406, 'entropy': 11.173457145690918, 'cur_lr': 4.999999873689376e-05, 'total_loss': 510.14752197265625, 'kl': 0.01647772826254368}, 'load_time_ms': 0.692, 'num_steps_sampled': 352800, 'update_time_ms': 2.501}",294,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.23098158836365,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,352800,352800,{},294,39,-92.72521688390759,2025-09-04_19-16-44,8.000000414224198,3651948,1757006204,-40.4530542152126,11031.405236959457,8530,27.98
+cda-server-2,False,11065.227889537811,"{'sample_time_ms': 33947.023, 'num_steps_trained': 354000, 'grad_time_ms': 372.241, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 533.9700927734375, 'policy_loss': -0.18131543695926666, 'vf_explained_var': 0.018143661320209503, 'entropy': 10.805140495300293, 'cur_lr': 4.999999873689376e-05, 'total_loss': 533.8145751953125, 'kl': 0.016999023035168648}, 'load_time_ms': 0.697, 'num_steps_sampled': 354000, 'update_time_ms': 2.498}",295,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.82265257835388,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,354000,354000,{},295,38,-92.81093397258293,2025-09-04_19-17-17,6.000249367189705,3651948,1757006237,-46.47372406120076,11065.227889537811,8568,30.75
+cda-server-2,False,11100.164932012558,"{'sample_time_ms': 34022.613, 'num_steps_trained': 355200, 'grad_time_ms': 372.335, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 457.92620849609375, 'policy_loss': -0.16722075641155243, 'vf_explained_var': 0.03289921581745148, 'entropy': 11.11696720123291, 'cur_lr': 4.999999873689376e-05, 'total_loss': 457.78167724609375, 'kl': 0.014939261600375175}, 'load_time_ms': 0.703, 'num_steps_sampled': 355200, 'update_time_ms': 2.498}",296,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.937042474746704,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,355200,355200,{},296,43,-92.81093397258293,2025-09-04_19-17-52,8.000185893064492,3651948,1757006272,-45.086113935137554,11100.164932012558,8611,29.87
+cda-server-2,False,11134.67031955719,"{'sample_time_ms': 34077.17, 'num_steps_trained': 356400, 'grad_time_ms': 373.02, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 457.5201721191406, 'policy_loss': -0.16585397720336914, 'vf_explained_var': 0.01322248950600624, 'entropy': 10.969801902770996, 'cur_lr': 4.999999873689376e-05, 'total_loss': 457.3786926269531, 'kl': 0.016034726053476334}, 'load_time_ms': 0.707, 'num_steps_sampled': 356400, 'update_time_ms': 2.507}",297,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.50538754463196,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,356400,356400,{},297,44,-91.00477448244305,2025-09-04_19-18-27,8.000185893064492,3651948,1757006307,-44.24866296136565,11134.67031955719,8655,29.73
+cda-server-2,False,11168.482450246811,"{'sample_time_ms': 34032.64, 'num_steps_trained': 357600, 'grad_time_ms': 373.878, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 438.6772155761719, 'policy_loss': -0.17128852009773254, 'vf_explained_var': 0.04709920659661293, 'entropy': 10.442963600158691, 'cur_lr': 4.999999873689376e-05, 'total_loss': 438.5312805175781, 'kl': 0.016678836196660995}, 'load_time_ms': 0.706, 'num_steps_sampled': 357600, 'update_time_ms': 2.507}",298,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.81213068962097,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,357600,357600,{},298,39,-91.00477448244305,2025-09-04_19-19-01,4.000119808434576,3651948,1757006341,-42.03931444679164,11168.482450246811,8694,28.7
+cda-server-2,False,11201.78668999672,"{'sample_time_ms': 34004.342, 'num_steps_trained': 358800, 'grad_time_ms': 375.751, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 447.9620361328125, 'policy_loss': -0.16031722724437714, 'vf_explained_var': 0.02535586804151535, 'entropy': 11.190040588378906, 'cur_lr': 4.999999873689376e-05, 'total_loss': 447.82476806640625, 'kl': 0.015202601440250874}, 'load_time_ms': 0.708, 'num_steps_sampled': 358800, 'update_time_ms': 2.519}",299,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.30423974990845,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,358800,358800,{},299,37,-90.47411887573381,2025-09-04_19-19-34,4.000031670263265,3651948,1757006374,-45.819786978871925,11201.78668999672,8731,30.58
+cda-server-2,False,11236.350129127502,"{'sample_time_ms': 34066.232, 'num_steps_trained': 360000, 'grad_time_ms': 372.985, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 470.8668212890625, 'policy_loss': -0.17791648209095, 'vf_explained_var': 0.025730881839990616, 'entropy': 10.827828407287598, 'cur_lr': 4.999999873689376e-05, 'total_loss': 470.7132568359375, 'kl': 0.01607631705701351}, 'load_time_ms': 0.684, 'num_steps_sampled': 360000, 'update_time_ms': 2.496}",300,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.56343913078308,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,360000,360000,{},300,42,-89.7114173671742,2025-09-04_19-20-09,4.000438841326207,3651948,1757006409,-43.891822344584035,11236.350129127502,8773,29.8
+cda-server-2,False,11270.30946135521,"{'sample_time_ms': 33899.468, 'num_steps_trained': 361200, 'grad_time_ms': 370.843, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 465.0353088378906, 'policy_loss': -0.17757363617420197, 'vf_explained_var': 0.025346828624606133, 'entropy': 10.962993621826172, 'cur_lr': 4.999999873689376e-05, 'total_loss': 464.88232421875, 'kl': 0.016150841489434242}, 'load_time_ms': 0.673, 'num_steps_sampled': 361200, 'update_time_ms': 2.498}",301,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.95933222770691,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,361200,361200,{},301,43,-90.85085832516624,2025-09-04_19-20-43,4.000807212266899,3651948,1757006443,-41.096598915944796,11270.30946135521,8816,28.4
+cda-server-2,False,11304.162751197815,"{'sample_time_ms': 33720.232, 'num_steps_trained': 362400, 'grad_time_ms': 372.928, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 440.8446960449219, 'policy_loss': -0.1764705777168274, 'vf_explained_var': 0.03883038088679314, 'entropy': 10.327861785888672, 'cur_lr': 4.999999873689376e-05, 'total_loss': 440.6927185058594, 'kl': 0.016116444021463394}, 'load_time_ms': 0.664, 'num_steps_sampled': 362400, 'update_time_ms': 2.526}",302,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.85328984260559,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,362400,362400,{},302,41,-90.85085832516624,2025-09-04_19-21-17,6.000016819112087,3651948,1757006477,-40.31461772771617,11304.162751197815,8857,27.8
+cda-server-2,False,11337.90143108368,"{'sample_time_ms': 33691.236, 'num_steps_trained': 363600, 'grad_time_ms': 373.751, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 488.3429870605469, 'policy_loss': -0.17381690442562103, 'vf_explained_var': 0.00864805094897747, 'entropy': 10.813539505004883, 'cur_lr': 4.999999873689376e-05, 'total_loss': 488.19244384765625, 'kl': 0.015300876460969448}, 'load_time_ms': 0.677, 'num_steps_sampled': 363600, 'update_time_ms': 2.546}",303,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.73867988586426,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,363600,363600,{},303,49,-90.38188123848992,2025-09-04_19-21-50,8.00014073366246,3651948,1757006510,-39.27054027924591,11337.90143108368,8906,27.24
+cda-server-2,False,11372.542692661285,"{'sample_time_ms': 33733.143, 'num_steps_trained': 364800, 'grad_time_ms': 372.899, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 452.9057312011719, 'policy_loss': -0.17092859745025635, 'vf_explained_var': 0.041277069598436356, 'entropy': 10.396652221679688, 'cur_lr': 4.999999873689376e-05, 'total_loss': 452.75872802734375, 'kl': 0.015768442302942276}, 'load_time_ms': 0.679, 'num_steps_sampled': 364800, 'update_time_ms': 2.516}",304,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.6412615776062,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,364800,364800,{},304,45,-90.7861691416485,2025-09-04_19-22-25,8.00014073366246,3651948,1757006545,-36.5787179220657,11372.542692661285,8951,25.85
+cda-server-2,False,11406.358407497406,"{'sample_time_ms': 33733.764, 'num_steps_trained': 366000, 'grad_time_ms': 371.583, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 446.9495849609375, 'policy_loss': -0.17110978066921234, 'vf_explained_var': 0.03779573738574982, 'entropy': 10.373178482055664, 'cur_lr': 4.999999873689376e-05, 'total_loss': 446.8055419921875, 'kl': 0.0178191140294075}, 'load_time_ms': 0.676, 'num_steps_sampled': 366000, 'update_time_ms': 2.511}",305,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.815714836120605,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,366000,366000,{},305,39,-92.60063372662192,2025-09-04_19-22-59,6.000007229369329,3651948,1757006579,-39.96323283805395,11406.358407497406,8990,27.7
+cda-server-2,False,11439.82025885582,"{'sample_time_ms': 33585.514, 'num_steps_trained': 367200, 'grad_time_ms': 372.256, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 436.24420166015625, 'policy_loss': -0.17119070887565613, 'vf_explained_var': 0.0166848823428154, 'entropy': 10.841540336608887, 'cur_lr': 4.999999873689376e-05, 'total_loss': 436.0961608886719, 'kl': 0.015268008224666119}, 'load_time_ms': 0.686, 'num_steps_sampled': 367200, 'update_time_ms': 2.526}",306,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.461851358413696,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,367200,367200,{},306,41,-92.60063372662192,2025-09-04_19-23-32,6.000004586562605,3651948,1757006612,-40.937305473336274,11439.82025885582,9031,28.35
+cda-server-2,False,11474.576438903809,"{'sample_time_ms': 33611.464, 'num_steps_trained': 368400, 'grad_time_ms': 371.401, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 468.3873291015625, 'policy_loss': -0.16533058881759644, 'vf_explained_var': 0.018551025539636612, 'entropy': 10.50613021850586, 'cur_lr': 4.999999873689376e-05, 'total_loss': 468.24755859375, 'kl': 0.016813894733786583}, 'load_time_ms': 0.684, 'num_steps_sampled': 368400, 'update_time_ms': 2.529}",307,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.75618004798889,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,368400,368400,{},307,48,-92.60063372662192,2025-09-04_19-24-07,8.000000400007286,3651948,1757006647,-41.111321091324605,11474.576438903809,9079,28.19
+cda-server-2,False,11509.679752349854,"{'sample_time_ms': 33741.183, 'num_steps_trained': 369600, 'grad_time_ms': 370.742, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 470.12060546875, 'policy_loss': -0.18637312948703766, 'vf_explained_var': 0.030650615692138672, 'entropy': 10.451064109802246, 'cur_lr': 4.999999873689376e-05, 'total_loss': 469.9599609375, 'kl': 0.016943683847784996}, 'load_time_ms': 0.695, 'num_steps_sampled': 369600, 'update_time_ms': 2.561}",308,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",35.10331344604492,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,369600,369600,{},308,51,-91.00169171281546,2025-09-04_19-24-42,8.000000400007286,3651948,1757006682,-34.03625109420629,11509.679752349854,9130,24.58
+cda-server-2,False,11543.63292002678,"{'sample_time_ms': 33807.24, 'num_steps_trained': 370800, 'grad_time_ms': 369.547, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 442.1883239746094, 'policy_loss': -0.1690937876701355, 'vf_explained_var': 0.02821219712495804, 'entropy': 10.33169174194336, 'cur_lr': 4.999999873689376e-05, 'total_loss': 442.0450744628906, 'kl': 0.017036719247698784}, 'load_time_ms': 0.696, 'num_steps_sampled': 370800, 'update_time_ms': 2.551}",309,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.95316767692566,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,370800,370800,{},309,47,-90.53602674662793,2025-09-04_19-25-16,8.000000798729044,3651948,1757006716,-34.02483593865472,11543.63292002678,9177,24.64
+cda-server-2,False,11579.952924489975,"{'sample_time_ms': 33982.89, 'num_steps_trained': 372000, 'grad_time_ms': 369.496, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 454.09228515625, 'policy_loss': -0.1684252768754959, 'vf_explained_var': 0.023490898311138153, 'entropy': 10.880743980407715, 'cur_lr': 4.999999873689376e-05, 'total_loss': 453.9481506347656, 'kl': 0.01598125509917736}, 'load_time_ms': 0.699, 'num_steps_sampled': 372000, 'update_time_ms': 2.593}",310,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",36.3200044631958,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,372000,372000,{},310,39,-90.92068676932722,2025-09-04_19-25-52,8.000000862998787,3651948,1757006752,-39.42615856737131,11579.952924489975,9216,27.46
+cda-server-2,False,11614.5580804348,"{'sample_time_ms': 34047.864, 'num_steps_trained': 373200, 'grad_time_ms': 369.15, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 457.7786560058594, 'policy_loss': -0.1775304675102234, 'vf_explained_var': 0.0441647432744503, 'entropy': 10.423691749572754, 'cur_lr': 4.999999873689376e-05, 'total_loss': 457.62640380859375, 'kl': 0.016661131754517555}, 'load_time_ms': 0.698, 'num_steps_sampled': 373200, 'update_time_ms': 2.534}",311,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.60515594482422,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,373200,373200,{},311,42,-90.92068676932722,2025-09-04_19-26-27,8.000000862998787,3651948,1757006787,-43.4336529885708,11614.5580804348,9258,29.6
+cda-server-2,False,11649.249782562256,"{'sample_time_ms': 34131.489, 'num_steps_trained': 374400, 'grad_time_ms': 369.354, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 432.60369873046875, 'policy_loss': -0.17828994989395142, 'vf_explained_var': 0.022118397057056427, 'entropy': 10.850200653076172, 'cur_lr': 4.999999873689376e-05, 'total_loss': 432.4488220214844, 'kl': 0.015431063249707222}, 'load_time_ms': 0.718, 'num_steps_sampled': 374400, 'update_time_ms': 2.521}",312,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.691702127456665,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,374400,374400,{},312,40,-90.92068676932722,2025-09-04_19-27-02,6.000735919372879,3651948,1757006822,-42.25150743222122,11649.249782562256,9298,29.17
+cda-server-2,False,11682.78334569931,"{'sample_time_ms': 34111.719, 'num_steps_trained': 375600, 'grad_time_ms': 368.598, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 459.1776123046875, 'policy_loss': -0.16833318769931793, 'vf_explained_var': 0.026400724425911903, 'entropy': 10.802581787109375, 'cur_lr': 4.999999873689376e-05, 'total_loss': 459.03466796875, 'kl': 0.016738150268793106}, 'load_time_ms': 0.704, 'num_steps_sampled': 375600, 'update_time_ms': 2.515}",313,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.53356313705444,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,375600,375600,{},313,42,-91.18932892772914,2025-09-04_19-27-35,6.000122607347135,3651948,1757006855,-41.76542683120597,11682.78334569931,9340,28.87
+cda-server-2,False,11716.662359952927,"{'sample_time_ms': 34033.137, 'num_steps_trained': 376800, 'grad_time_ms': 370.927, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 485.48138427734375, 'policy_loss': -0.17875385284423828, 'vf_explained_var': 0.027348611503839493, 'entropy': 10.68716812133789, 'cur_lr': 4.999999873689376e-05, 'total_loss': 485.32855224609375, 'kl': 0.01707782968878746}, 'load_time_ms': 0.704, 'num_steps_sampled': 376800, 'update_time_ms': 2.547}",314,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.87901425361633,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,376800,376800,{},314,42,-91.18932892772914,2025-09-04_19-28-09,6.000089715762789,3651948,1757006889,-40.81040170179933,11716.662359952927,9382,28.27
+cda-server-2,False,11751.556572198868,"{'sample_time_ms': 34139.255, 'num_steps_trained': 378000, 'grad_time_ms': 372.558, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 476.6417541503906, 'policy_loss': -0.17584042251110077, 'vf_explained_var': 0.024531476199626923, 'entropy': 10.380701065063477, 'cur_lr': 4.999999873689376e-05, 'total_loss': 476.48974609375, 'kl': 0.015712270513176918}, 'load_time_ms': 0.71, 'num_steps_sampled': 378000, 'update_time_ms': 2.589}",315,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.89421224594116,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,378000,378000,{},315,42,-91.48104105416093,2025-09-04_19-28-44,4.0007128072887825,3651948,1757006924,-41.17581426210382,11751.556572198868,9424,28.3
+cda-server-2,False,11785.116604804993,"{'sample_time_ms': 34150.172, 'num_steps_trained': 379200, 'grad_time_ms': 371.473, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 384.29254150390625, 'policy_loss': -0.1812402904033661, 'vf_explained_var': 0.0466405488550663, 'entropy': 10.41860294342041, 'cur_lr': 4.999999873689376e-05, 'total_loss': 384.13897705078125, 'kl': 0.018215632066130638}, 'load_time_ms': 0.7, 'num_steps_sampled': 379200, 'update_time_ms': 2.611}",316,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.56003260612488,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,379200,379200,{},316,51,-91.48104105416093,2025-09-04_19-29-18,8.000002855581583,3651948,1757006958,-37.85689061438333,11785.116604804993,9475,26.68
+cda-server-2,False,11819.322497367859,"{'sample_time_ms': 34095.459, 'num_steps_trained': 380400, 'grad_time_ms': 371.202, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 415.83062744140625, 'policy_loss': -0.17209358513355255, 'vf_explained_var': 0.027635348960757256, 'entropy': 10.5113525390625, 'cur_lr': 4.999999873689376e-05, 'total_loss': 415.6828918457031, 'kl': 0.016044579446315765}, 'load_time_ms': 0.696, 'num_steps_sampled': 380400, 'update_time_ms': 2.583}",317,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.20589256286621,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,380400,380400,{},317,51,-88.86592084271396,2025-09-04_19-29-52,8.00008632033351,3651948,1757006992,-31.692435923445323,11819.322497367859,9526,23.74
+cda-server-2,False,11853.869593143463,"{'sample_time_ms': 34041.677, 'num_steps_trained': 381600, 'grad_time_ms': 369.43, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 425.4674072265625, 'policy_loss': -0.1668584793806076, 'vf_explained_var': 0.018289612606167793, 'entropy': 10.087419509887695, 'cur_lr': 4.999999873689376e-05, 'total_loss': 425.3260192871094, 'kl': 0.016720084473490715}, 'load_time_ms': 0.678, 'num_steps_sampled': 381600, 'update_time_ms': 2.542}",318,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.54709577560425,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,381600,381600,{},318,53,-88.86592084271396,2025-09-04_19-30-27,8.000000403908986,3651948,1757007027,-29.71029033445762,11853.869593143463,9579,22.75
+cda-server-2,False,11887.57539987564,"{'sample_time_ms': 34015.048, 'num_steps_trained': 382800, 'grad_time_ms': 371.326, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 417.201171875, 'policy_loss': -0.17120924592018127, 'vf_explained_var': 0.034845318645238876, 'entropy': 10.042140007019043, 'cur_lr': 4.999999873689376e-05, 'total_loss': 417.05718994140625, 'kl': 0.017931900918483734}, 'load_time_ms': 0.68, 'num_steps_sampled': 382800, 'update_time_ms': 2.521}",319,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.705806732177734,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,382800,382800,{},319,40,-90.3618584300389,2025-09-04_19-31-00,8.000088781906566,3651948,1757007060,-35.939346751680006,11887.57539987564,9619,25.8
+cda-server-2,False,11921.361751317978,"{'sample_time_ms': 33761.574, 'num_steps_trained': 384000, 'grad_time_ms': 371.469, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 457.16656494140625, 'policy_loss': -0.18198245763778687, 'vf_explained_var': 0.013436595909297466, 'entropy': 10.356392860412598, 'cur_lr': 4.999999873689376e-05, 'total_loss': 457.0089416503906, 'kl': 0.01606649160385132}, 'load_time_ms': 0.681, 'num_steps_sampled': 384000, 'update_time_ms': 2.496}",320,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.786351442337036,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,384000,384000,{},320,48,-90.3618584300389,2025-09-04_19-31-34,10.0,3651948,1757007094,-37.11216717509862,11921.361751317978,9667,26.4
+cda-server-2,False,11956.108426094055,"{'sample_time_ms': 33773.094, 'num_steps_trained': 385200, 'grad_time_ms': 374.036, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 386.59881591796875, 'policy_loss': -0.16370391845703125, 'vf_explained_var': 0.02832198143005371, 'entropy': 9.846466064453125, 'cur_lr': 4.999999873689376e-05, 'total_loss': 386.4592590332031, 'kl': 0.015905356034636497}, 'load_time_ms': 0.692, 'num_steps_sampled': 385200, 'update_time_ms': 2.5}",321,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.74667477607727,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,385200,385200,{},321,59,-88.47117760721373,2025-09-04_19-32-09,10.0,3651948,1757007129,-28.40865093339731,11956.108426094055,9726,22.03
+cda-server-2,False,11989.999118328094,"{'sample_time_ms': 33693.055, 'num_steps_trained': 386400, 'grad_time_ms': 373.996, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 372.04437255859375, 'policy_loss': -0.16547948122024536, 'vf_explained_var': 0.043456368148326874, 'entropy': 10.279644012451172, 'cur_lr': 4.999999873689376e-05, 'total_loss': 371.90472412109375, 'kl': 0.016999872401356697}, 'load_time_ms': 0.679, 'num_steps_sampled': 386400, 'update_time_ms': 2.53}",322,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.89069223403931,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,386400,386400,{},322,40,-89.53242538543225,2025-09-04_19-32-43,8.000425996567381,3651948,1757007163,-32.815327842877174,11989.999118328094,9766,24.51
+cda-server-2,False,12024.175191640854,"{'sample_time_ms': 33757.041, 'num_steps_trained': 387600, 'grad_time_ms': 374.23, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 489.5959167480469, 'policy_loss': -0.1738227903842926, 'vf_explained_var': 0.023348549380898476, 'entropy': 10.526703834533691, 'cur_lr': 4.999999873689376e-05, 'total_loss': 489.4485778808594, 'kl': 0.01741768978536129}, 'load_time_ms': 0.697, 'num_steps_sampled': 387600, 'update_time_ms': 2.52}",323,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.1760733127594,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,387600,387600,{},323,52,-89.53242538543225,2025-09-04_19-33-17,8.001431129957744,3651948,1757007197,-34.47625280541841,12024.175191640854,9818,25.18
+cda-server-2,False,12059.069237470627,"{'sample_time_ms': 33860.581, 'num_steps_trained': 388800, 'grad_time_ms': 372.195, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 440.6160888671875, 'policy_loss': -0.18319621682167053, 'vf_explained_var': 0.03996426612138748, 'entropy': 10.28986930847168, 'cur_lr': 4.999999873689376e-05, 'total_loss': 440.45819091796875, 'kl': 0.01664073020219803}, 'load_time_ms': 0.699, 'num_steps_sampled': 388800, 'update_time_ms': 2.511}",324,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.89404582977295,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,388800,388800,{},324,44,-89.8807812709613,2025-09-04_19-33-52,8.001431129957744,3651948,1757007232,-34.75706033933923,12059.069237470627,9862,25.01
+cda-server-2,False,12093.288192510605,"{'sample_time_ms': 33793.196, 'num_steps_trained': 390000, 'grad_time_ms': 372.144, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 507.6988525390625, 'policy_loss': -0.17963431775569916, 'vf_explained_var': 0.016723016276955605, 'entropy': 10.052237510681152, 'cur_lr': 4.999999873689376e-05, 'total_loss': 507.5438232421875, 'kl': 0.01622912287712097}, 'load_time_ms': 0.702, 'num_steps_sampled': 390000, 'update_time_ms': 2.472}",325,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.21895503997803,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,390000,390000,{},325,42,-89.8807812709613,2025-09-04_19-34-26,8.001431129957744,3651948,1757007266,-36.93958016580334,12093.288192510605,9904,26.11
+cda-server-2,False,12127.234577655792,"{'sample_time_ms': 33833.444, 'num_steps_trained': 391200, 'grad_time_ms': 370.617, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 478.32574462890625, 'policy_loss': -0.16879968345165253, 'vf_explained_var': 0.04178478196263313, 'entropy': 9.682168960571289, 'cur_lr': 4.999999873689376e-05, 'total_loss': 478.1843566894531, 'kl': 0.018091507256031036}, 'load_time_ms': 0.689, 'num_steps_sampled': 391200, 'update_time_ms': 2.442}",326,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.94638514518738,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,391200,391200,{},326,48,-90.19253982500018,2025-09-04_19-35-00,6.000000800106407,3651948,1757007300,-37.00160192012961,12127.234577655792,9952,26.28
+cda-server-2,False,12161.142486095428,"{'sample_time_ms': 33802.232, 'num_steps_trained': 392400, 'grad_time_ms': 371.967, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 454.6077880859375, 'policy_loss': -0.18222576379776, 'vf_explained_var': 0.03381510451436043, 'entropy': 10.573821067810059, 'cur_lr': 4.999999873689376e-05, 'total_loss': 454.4508056640625, 'kl': 0.0166572667658329}, 'load_time_ms': 0.696, 'num_steps_sampled': 392400, 'update_time_ms': 2.491}",327,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.90790843963623,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,392400,392400,{},327,43,-91.57674154747771,2025-09-04_19-35-34,6.00000411929137,3651948,1757007334,-39.444600090042655,12161.142486095428,9995,27.34
+cda-server-2,False,12194.925875902176,"{'sample_time_ms': 33723.657, 'num_steps_trained': 393600, 'grad_time_ms': 374.075, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 458.1942138671875, 'policy_loss': -0.1842077076435089, 'vf_explained_var': 0.03987787663936615, 'entropy': 10.133516311645508, 'cur_lr': 4.999999873689376e-05, 'total_loss': 458.0357971191406, 'kl': 0.016979089006781578}, 'load_time_ms': 0.698, 'num_steps_sampled': 393600, 'update_time_ms': 2.536}",328,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.78338980674744,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,393600,393600,{},328,52,-91.57674154747771,2025-09-04_19-36-08,8.000056522565014,3651948,1757007368,-36.65842037138543,12194.925875902176,10047,25.97
+cda-server-2,False,12228.733886957169,"{'sample_time_ms': 33736.663, 'num_steps_trained': 394800, 'grad_time_ms': 371.248, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 393.3811950683594, 'policy_loss': -0.17357520759105682, 'vf_explained_var': 0.0357980877161026, 'entropy': 10.196868896484375, 'cur_lr': 4.999999873689376e-05, 'total_loss': 393.2325744628906, 'kl': 0.016411934047937393}, 'load_time_ms': 0.694, 'num_steps_sampled': 394800, 'update_time_ms': 2.613}",329,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.808011054992676,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,394800,394800,{},329,51,-91.04302995749633,2025-09-04_19-36-42,8.000056522565014,3651948,1757007402,-30.52933196730402,12228.733886957169,10098,22.97
+cda-server-2,False,12262.225366592407,"{'sample_time_ms': 33705.83, 'num_steps_trained': 396000, 'grad_time_ms': 372.564, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 460.1976013183594, 'policy_loss': -0.16239674389362335, 'vf_explained_var': 0.028911564499139786, 'entropy': 9.807985305786133, 'cur_lr': 4.999999873689376e-05, 'total_loss': 460.06298828125, 'kl': 0.018334100022912025}, 'load_time_ms': 0.697, 'num_steps_sampled': 396000, 'update_time_ms': 2.616}",330,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.49147963523865,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,396000,396000,{},330,54,-91.04302995749633,2025-09-04_19-37-15,8.000016476484692,3651948,1757007435,-29.211292415069355,12262.225366592407,10152,22.39
+cda-server-2,False,12295.86148929596,"{'sample_time_ms': 33596.572, 'num_steps_trained': 397200, 'grad_time_ms': 370.79, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 444.2686462402344, 'policy_loss': -0.18200664222240448, 'vf_explained_var': 0.02418290264904499, 'entropy': 10.006134033203125, 'cur_lr': 4.999999873689376e-05, 'total_loss': 444.1136474609375, 'kl': 0.017796959728002548}, 'load_time_ms': 0.69, 'num_steps_sampled': 397200, 'update_time_ms': 2.642}",331,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.636122703552246,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,397200,397200,{},331,49,-92.5131862655504,2025-09-04_19-37-49,8.000016476484692,3651948,1757007469,-31.506307522804214,12295.86148929596,10201,23.42
+cda-server-2,False,12329.590245008469,"{'sample_time_ms': 33579.446, 'num_steps_trained': 398400, 'grad_time_ms': 371.749, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 420.9778747558594, 'policy_loss': -0.1725282073020935, 'vf_explained_var': 0.03219058737158775, 'entropy': 10.413145065307617, 'cur_lr': 4.999999873689376e-05, 'total_loss': 420.8314208984375, 'kl': 0.017172694206237793}, 'load_time_ms': 0.686, 'num_steps_sampled': 398400, 'update_time_ms': 2.614}",332,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.728755712509155,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,398400,398400,{},332,52,-92.5131862655504,2025-09-04_19-38-23,8.00000079838559,3651948,1757007503,-31.67599185907574,12329.590245008469,10253,23.77
+cda-server-2,False,12363.33668923378,"{'sample_time_ms': 33537.076, 'num_steps_trained': 399600, 'grad_time_ms': 371.145, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 411.8221130371094, 'policy_loss': -0.17770729959011078, 'vf_explained_var': 0.023092150688171387, 'entropy': 9.869694709777832, 'cur_lr': 4.999999873689376e-05, 'total_loss': 411.67071533203125, 'kl': 0.017328284680843353}, 'load_time_ms': 0.68, 'num_steps_sampled': 399600, 'update_time_ms': 2.624}",333,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.74644422531128,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,399600,399600,{},333,48,-90.46378407689798,2025-09-04_19-38-56,8.000012068655842,3651948,1757007536,-31.981388016325546,12363.33668923378,10301,23.92
+cda-server-2,False,12396.86396741867,"{'sample_time_ms': 33400.582, 'num_steps_trained': 400800, 'grad_time_ms': 370.955, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 413.9346923828125, 'policy_loss': -0.17096306383609772, 'vf_explained_var': 0.029395541176199913, 'entropy': 10.063179016113281, 'cur_lr': 4.999999873689376e-05, 'total_loss': 413.7904968261719, 'kl': 0.017636030912399292}, 'load_time_ms': 0.679, 'num_steps_sampled': 400800, 'update_time_ms': 2.625}",334,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.52727818489075,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,400800,400800,{},334,53,-90.15161401563806,2025-09-04_19-39-30,8.000012068655842,3651948,1757007570,-31.55424944512829,12396.86396741867,10354,23.56
+cda-server-2,False,12431.238450527191,"{'sample_time_ms': 33416.36, 'num_steps_trained': 402000, 'grad_time_ms': 370.713, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 399.13482666015625, 'policy_loss': -0.17379695177078247, 'vf_explained_var': 0.027004705742001534, 'entropy': 9.99160099029541, 'cur_lr': 4.999999873689376e-05, 'total_loss': 398.9884033203125, 'kl': 0.018025698140263557}, 'load_time_ms': 0.67, 'num_steps_sampled': 402000, 'update_time_ms': 2.643}",335,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.37448310852051,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,402000,402000,{},335,47,-89.7706388726001,2025-09-04_19-40-04,8.000003450078843,3651948,1757007604,-32.67628960781086,12431.238450527191,10401,24.22
+cda-server-2,False,12466.360349416733,"{'sample_time_ms': 33534.046, 'num_steps_trained': 403200, 'grad_time_ms': 370.576, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 462.3807067871094, 'policy_loss': -0.17687784135341644, 'vf_explained_var': 0.03196879103779793, 'entropy': 9.953373908996582, 'cur_lr': 4.999999873689376e-05, 'total_loss': 462.2281494140625, 'kl': 0.016013547778129578}, 'load_time_ms': 0.667, 'num_steps_sampled': 403200, 'update_time_ms': 2.637}",336,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",35.121898889541626,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,403200,403200,{},336,48,-90.71343893449779,2025-09-04_19-40-39,8.000039580994097,3651948,1757007639,-34.02233865251927,12466.360349416733,10449,24.92
+cda-server-2,False,12500.164155006409,"{'sample_time_ms': 33524.415, 'num_steps_trained': 404400, 'grad_time_ms': 369.817, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 405.3450927734375, 'policy_loss': -0.15513576567173004, 'vf_explained_var': 0.04755732789635658, 'entropy': 9.821985244750977, 'cur_lr': 4.999999873689376e-05, 'total_loss': 405.2188720703125, 'kl': 0.019034268334507942}, 'load_time_ms': 0.674, 'num_steps_sampled': 404400, 'update_time_ms': 2.591}",337,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.8038055896759,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,404400,404400,{},337,53,-90.71343893449779,2025-09-04_19-41-13,8.000175876177645,3651948,1757007673,-30.839615923559972,12500.164155006409,10502,23.29
+cda-server-2,False,12534.323317050934,"{'sample_time_ms': 33560.971, 'num_steps_trained': 405600, 'grad_time_ms': 370.799, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 423.502197265625, 'policy_loss': -0.1640964299440384, 'vf_explained_var': 0.02169586531817913, 'entropy': 10.113418579101562, 'cur_lr': 4.999999873689376e-05, 'total_loss': 423.36187744140625, 'kl': 0.015655517578125}, 'load_time_ms': 0.694, 'num_steps_sampled': 405600, 'update_time_ms': 2.569}",338,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.15916204452515,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,405600,405600,{},338,59,-88.64704893157999,2025-09-04_19-41-47,8.000175876177645,3651948,1757007707,-29.41639047896397,12534.323317050934,10561,22.53
+cda-server-2,False,12569.255161523819,"{'sample_time_ms': 33671.241, 'num_steps_trained': 406800, 'grad_time_ms': 372.866, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 383.4759216308594, 'policy_loss': -0.17542091012001038, 'vf_explained_var': 0.035720545798540115, 'entropy': 9.911641120910645, 'cur_lr': 4.999999873689376e-05, 'total_loss': 383.3271484375, 'kl': 0.0175609327852726}, 'load_time_ms': 0.699, 'num_steps_sampled': 406800, 'update_time_ms': 2.558}",339,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.93184447288513,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,406800,406800,{},339,54,-88.95639733666384,2025-09-04_19-42-22,8.000058437428255,3651948,1757007742,-27.918244561996463,12569.255161523819,10615,21.57
+cda-server-2,False,12603.29483294487,"{'sample_time_ms': 33726.576, 'num_steps_trained': 408000, 'grad_time_ms': 372.281, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 403.29644775390625, 'policy_loss': -0.1811588853597641, 'vf_explained_var': 0.0359710268676281, 'entropy': 10.170073509216309, 'cur_lr': 4.999999873689376e-05, 'total_loss': 403.1409912109375, 'kl': 0.016941126435995102}, 'load_time_ms': 0.707, 'num_steps_sampled': 408000, 'update_time_ms': 2.58}",340,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.039671421051025,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,408000,408000,{},340,57,-88.95639733666384,2025-09-04_19-42-56,8.000000400035175,3651948,1757007776,-29.118594932413465,12603.29483294487,10672,22.42
+cda-server-2,False,12637.108101844788,"{'sample_time_ms': 33743.381, 'num_steps_trained': 409200, 'grad_time_ms': 373.145, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 424.63726806640625, 'policy_loss': -0.16347110271453857, 'vf_explained_var': 0.01010894775390625, 'entropy': 9.663955688476562, 'cur_lr': 4.999999873689376e-05, 'total_loss': 424.49853515625, 'kl': 0.01630197837948799}, 'load_time_ms': 0.705, 'num_steps_sampled': 409200, 'update_time_ms': 2.578}",341,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.8132688999176,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,409200,409200,{},341,53,-90.20769829520017,2025-09-04_19-43-30,6.00013661095259,3651948,1757007810,-27.016400122087536,12637.108101844788,10725,21.22
+cda-server-2,False,12671.506431818008,"{'sample_time_ms': 33811.34, 'num_steps_trained': 410400, 'grad_time_ms': 372.123, 'default': {'cur_kl_coeff': 1.5187499523162842, 'vf_loss': 449.7720642089844, 'policy_loss': -0.16926951706409454, 'vf_explained_var': 0.023888012394309044, 'entropy': 10.053503036499023, 'cur_lr': 4.999999873689376e-05, 'total_loss': 449.6346435546875, 'kl': 0.020978741347789764}, 'load_time_ms': 0.703, 'num_steps_sampled': 410400, 'update_time_ms': 2.601}",342,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.398329973220825,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,410400,410400,{},342,53,-91.16869126360704,2025-09-04_19-44-05,6.000537418055949,3651948,1757007845,-30.113403214695477,12671.506431818008,10778,22.61
+cda-server-2,False,12705.677307367325,"{'sample_time_ms': 33852.925, 'num_steps_trained': 411600, 'grad_time_ms': 373.041, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 439.0072021484375, 'policy_loss': -0.16299201548099518, 'vf_explained_var': 0.03032485581934452, 'entropy': 9.606775283813477, 'cur_lr': 4.999999873689376e-05, 'total_loss': 438.87518310546875, 'kl': 0.013596983626484871}, 'load_time_ms': 0.693, 'num_steps_sampled': 411600, 'update_time_ms': 2.607}",343,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.170875549316406,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,411600,411600,{},343,48,-91.6940993715843,2025-09-04_19-44-39,6.000537418055949,3651948,1757007879,-32.992714094059394,12705.677307367325,10826,24.04
+cda-server-2,False,12739.35609960556,"{'sample_time_ms': 33866.384, 'num_steps_trained': 412800, 'grad_time_ms': 374.732, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 446.5347595214844, 'policy_loss': -0.1601501703262329, 'vf_explained_var': 0.03740853816270828, 'entropy': 9.86202621459961, 'cur_lr': 4.999999873689376e-05, 'total_loss': 446.40704345703125, 'kl': 0.014236312359571457}, 'load_time_ms': 0.691, 'num_steps_sampled': 412800, 'update_time_ms': 2.584}",344,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.678792238235474,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,412800,412800,{},344,49,-91.6940993715843,2025-09-04_19-45-13,8.000000558693417,3651948,1757007913,-33.83636061839598,12739.35609960556,10875,24.63
+cda-server-2,False,12774.302807807922,"{'sample_time_ms': 33925.112, 'num_steps_trained': 414000, 'grad_time_ms': 373.227, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 488.3459167480469, 'policy_loss': -0.15767335891723633, 'vf_explained_var': 0.021632233634591103, 'entropy': 9.496479988098145, 'cur_lr': 4.999999873689376e-05, 'total_loss': 488.2213439941406, 'kl': 0.014542263001203537}, 'load_time_ms': 0.69, 'num_steps_sampled': 414000, 'update_time_ms': 2.588}",345,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.94670820236206,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,414000,414000,{},345,45,-93.34928755288458,2025-09-04_19-45-48,8.000000558693417,3651948,1757007948,-34.39253631504618,12774.302807807922,10920,24.92
+cda-server-2,False,12808.581895112991,"{'sample_time_ms': 33838.976, 'num_steps_trained': 415200, 'grad_time_ms': 375.046, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 492.27984619140625, 'policy_loss': -0.1693073809146881, 'vf_explained_var': 0.02376851812005043, 'entropy': 9.99674129486084, 'cur_lr': 4.999999873689376e-05, 'total_loss': 492.14208984375, 'kl': 0.013864864595234394}, 'load_time_ms': 0.694, 'num_steps_sampled': 415200, 'update_time_ms': 2.58}",346,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.27908730506897,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,415200,415200,{},346,56,-93.34928755288458,2025-09-04_19-46-22,8.000000400000136,3651948,1757007982,-31.76091932012032,12808.581895112991,10976,23.41
+cda-server-2,False,12842.978868246078,"{'sample_time_ms': 33900.113, 'num_steps_trained': 416400, 'grad_time_ms': 373.212, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 410.708740234375, 'policy_loss': -0.16634632647037506, 'vf_explained_var': 0.02237485535442829, 'entropy': 9.893744468688965, 'cur_lr': 4.999999873689376e-05, 'total_loss': 410.57379150390625, 'kl': 0.013760336674749851}, 'load_time_ms': 0.683, 'num_steps_sampled': 416400, 'update_time_ms': 2.613}",347,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.39697313308716,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,416400,416400,{},347,52,-91.73648583108125,2025-09-04_19-46-56,8.000025663690788,3651948,1757008016,-27.793360037345355,12842.978868246078,11028,21.75
+cda-server-2,False,12876.585081338882,"{'sample_time_ms': 33845.898, 'num_steps_trained': 417600, 'grad_time_ms': 372.207, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 476.0789794921875, 'policy_loss': -0.17810894548892975, 'vf_explained_var': 0.021973775699734688, 'entropy': 10.166853904724121, 'cur_lr': 4.999999873689376e-05, 'total_loss': 475.93109130859375, 'kl': 0.013259019702672958}, 'load_time_ms': 0.672, 'num_steps_sampled': 417600, 'update_time_ms': 2.604}",348,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.606213092803955,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,417600,417600,{},348,47,-88.64009437858603,2025-09-04_19-47-30,8.000025663690788,3651948,1757008050,-32.03545341435674,12876.585081338882,11075,24.11
+cda-server-2,False,12911.08446264267,"{'sample_time_ms': 33803.342, 'num_steps_trained': 418800, 'grad_time_ms': 371.594, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 453.6725769042969, 'policy_loss': -0.16002562642097473, 'vf_explained_var': 0.01317038107663393, 'entropy': 9.815576553344727, 'cur_lr': 4.999999873689376e-05, 'total_loss': 453.5440673828125, 'kl': 0.013857332058250904}, 'load_time_ms': 0.669, 'num_steps_sampled': 418800, 'update_time_ms': 2.589}",349,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.49938130378723,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,418800,418800,{},349,55,-90.00761375590767,2025-09-04_19-48-04,8.000005925901757,3651948,1757008084,-30.707022201649437,12911.08446264267,11130,23.22
+cda-server-2,False,12945.261041402817,"{'sample_time_ms': 33817.869, 'num_steps_trained': 420000, 'grad_time_ms': 370.853, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 407.9132995605469, 'policy_loss': -0.16499578952789307, 'vf_explained_var': 0.03471194952726364, 'entropy': 9.714946746826172, 'cur_lr': 4.999999873689376e-05, 'total_loss': 407.78009033203125, 'kl': 0.013945111073553562}, 'load_time_ms': 0.66, 'num_steps_sampled': 420000, 'update_time_ms': 2.544}",350,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.176578760147095,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,420000,420000,{},350,51,-90.00761375590767,2025-09-04_19-48-39,8.000005925901757,3651948,1757008119,-30.900000083980526,12945.261041402817,11181,23.31
+cda-server-2,False,12979.243041276932,"{'sample_time_ms': 33834.387, 'num_steps_trained': 421200, 'grad_time_ms': 371.215, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 537.1176147460938, 'policy_loss': -0.17601019144058228, 'vf_explained_var': 0.02266140840947628, 'entropy': 9.728290557861328, 'cur_lr': 4.999999873689376e-05, 'total_loss': 536.9721069335938, 'kl': 0.01338786631822586}, 'load_time_ms': 0.677, 'num_steps_sampled': 421200, 'update_time_ms': 2.534}",351,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.98199987411499,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,421200,421200,{},351,48,-90.79064585737844,2025-09-04_19-49-13,8.000002448755124,3651948,1757008153,-33.23412642428405,12979.243041276932,11229,24.62
+cda-server-2,False,13014.3508746624,"{'sample_time_ms': 33905.471, 'num_steps_trained': 422400, 'grad_time_ms': 370.991, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 408.62109375, 'policy_loss': -0.16284993290901184, 'vf_explained_var': 0.02897910214960575, 'entropy': 9.529528617858887, 'cur_lr': 4.999999873689376e-05, 'total_loss': 408.4875793457031, 'kl': 0.012877307832241058}, 'load_time_ms': 0.686, 'num_steps_sampled': 422400, 'update_time_ms': 2.538}",352,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",35.10783338546753,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,422400,422400,{},352,51,-90.79064585737844,2025-09-04_19-49-48,8.001778770566665,3651948,1757008188,-32.80922522194475,13014.3508746624,11280,24.32
+cda-server-2,False,13048.411231994629,"{'sample_time_ms': 33894.548, 'num_steps_trained': 423600, 'grad_time_ms': 370.781, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 435.6541442871094, 'policy_loss': -0.16300548613071442, 'vf_explained_var': 0.023583777248859406, 'entropy': 9.866662979125977, 'cur_lr': 4.999999873689376e-05, 'total_loss': 435.5221252441406, 'kl': 0.013616513460874557}, 'load_time_ms': 0.697, 'num_steps_sampled': 423600, 'update_time_ms': 2.544}",353,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.060357332229614,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,423600,423600,{},353,52,-89.77622935353544,2025-09-04_19-50-22,8.001778770566665,3651948,1757008222,-29.7972654921513,13048.411231994629,11332,22.84
+cda-server-2,False,13082.31562924385,"{'sample_time_ms': 33916.361, 'num_steps_trained': 424800, 'grad_time_ms': 371.457, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 426.9761047363281, 'policy_loss': -0.17525018751621246, 'vf_explained_var': 0.03275012969970703, 'entropy': 9.468761444091797, 'cur_lr': 4.999999873689376e-05, 'total_loss': 426.8311462402344, 'kl': 0.013292660936713219}, 'load_time_ms': 0.706, 'num_steps_sampled': 424800, 'update_time_ms': 2.595}",354,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",33.9043972492218,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,424800,424800,{},354,55,-89.38275743036891,2025-09-04_19-50-56,8.002367688676628,3651948,1757008256,-29.41284275895553,13082.31562924385,11387,22.39
+cda-server-2,False,13116.891928434372,"{'sample_time_ms': 33879.38, 'num_steps_trained': 426000, 'grad_time_ms': 371.365, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 413.310302734375, 'policy_loss': -0.16663283109664917, 'vf_explained_var': 0.02166224829852581, 'entropy': 9.784157752990723, 'cur_lr': 4.999999873689376e-05, 'total_loss': 413.1813049316406, 'kl': 0.016536220908164978}, 'load_time_ms': 0.706, 'num_steps_sampled': 426000, 'update_time_ms': 2.631}",355,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.57629919052124,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,426000,426000,{},355,50,-90.10231683526233,2025-09-04_19-51-30,8.002367688676628,3651948,1757008290,-29.290278504249542,13116.891928434372,11437,22.49
+cda-server-2,False,13151.868121147156,"{'sample_time_ms': 33947.937, 'num_steps_trained': 427200, 'grad_time_ms': 372.465, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 486.1282958984375, 'policy_loss': -0.17111438512802124, 'vf_explained_var': 0.014239702373743057, 'entropy': 9.42212200164795, 'cur_lr': 4.999999873689376e-05, 'total_loss': 485.98779296875, 'kl': 0.013429854065179825}, 'load_time_ms': 0.718, 'num_steps_sampled': 427200, 'update_time_ms': 2.635}",356,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.97619271278381,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,427200,427200,{},356,54,-90.10231683526233,2025-09-04_19-52-05,8.000029955293169,3651948,1757008325,-29.83568825574139,13151.868121147156,11491,22.68
+cda-server-2,False,13186.134541034698,"{'sample_time_ms': 33932.595, 'num_steps_trained': 428400, 'grad_time_ms': 374.671, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 427.9323425292969, 'policy_loss': -0.16929209232330322, 'vf_explained_var': 0.01650114171206951, 'entropy': 9.972504615783691, 'cur_lr': 4.999999873689376e-05, 'total_loss': 427.7929992675781, 'kl': 0.013160786591470242}, 'load_time_ms': 0.715, 'num_steps_sampled': 428400, 'update_time_ms': 2.638}",357,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",34.266419887542725,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,428400,428400,{},357,51,-88.82757409968143,2025-09-04_19-52-40,8.000629019591925,3651948,1757008360,-30.30186789684934,13186.134541034698,11542,23.02
+cda-server-2,False,13221.57733464241,"{'sample_time_ms': 34117.193, 'num_steps_trained': 429600, 'grad_time_ms': 373.729, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 398.54595947265625, 'policy_loss': -0.17490650713443756, 'vf_explained_var': 0.026144007220864296, 'entropy': 9.909229278564453, 'cur_lr': 4.999999873689376e-05, 'total_loss': 398.4010925292969, 'kl': 0.01319141685962677}, 'load_time_ms': 0.709, 'num_steps_sampled': 429600, 'update_time_ms': 2.68}",358,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",35.44279360771179,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,429600,429600,{},358,47,-91.63522504734092,2025-09-04_19-53-15,8.000629019591925,3651948,1757008395,-33.281730390489905,13221.57733464241,11589,24.82
+cda-server-2,False,13261.919956684113,"{'sample_time_ms': 34701.109, 'num_steps_trained': 430800, 'grad_time_ms': 374.187, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 452.0755615234375, 'policy_loss': -0.16567420959472656, 'vf_explained_var': 0.030823178589344025, 'entropy': 9.092144966125488, 'cur_lr': 4.999999873689376e-05, 'total_loss': 451.9403991699219, 'kl': 0.01339884102344513}, 'load_time_ms': 0.722, 'num_steps_sampled': 430800, 'update_time_ms': 2.628}",359,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.34262204170227,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,430800,430800,{},359,47,-91.82475131787795,2025-09-04_19-53-55,8.000045128377083,3651948,1757008435,-32.980668447373354,13261.919956684113,11636,24.54
+cda-server-2,False,13302.515436410904,"{'sample_time_ms': 35340.513, 'num_steps_trained': 432000, 'grad_time_ms': 376.642, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 337.7193298339844, 'policy_loss': -0.17150822281837463, 'vf_explained_var': 0.06921210139989853, 'entropy': 9.250479698181152, 'cur_lr': 4.999999873689376e-05, 'total_loss': 337.5768127441406, 'kl': 0.01273456308990717}, 'load_time_ms': 0.724, 'num_steps_sampled': 432000, 'update_time_ms': 2.642}",360,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.59547972679138,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,432000,432000,{},360,52,-91.82475131787795,2025-09-04_19-54-36,6.0000902681365496,3651948,1757008476,-33.06721707520575,13302.515436410904,11688,24.49
+cda-server-2,False,13343.17271733284,"{'sample_time_ms': 36010.026, 'num_steps_trained': 433200, 'grad_time_ms': 374.677, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 418.0478820800781, 'policy_loss': -0.14703959226608276, 'vf_explained_var': 0.011033753864467144, 'entropy': 10.082448959350586, 'cur_lr': 4.999999873689376e-05, 'total_loss': 417.92901611328125, 'kl': 0.012359730899333954}, 'load_time_ms': 0.709, 'num_steps_sampled': 433200, 'update_time_ms': 2.679}",361,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.657280921936035,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,433200,433200,{},361,52,-89.92808180406912,2025-09-04_19-55-17,6.000208357574652,3651948,1757008517,-29.601588486112334,13343.17271733284,11740,22.67
+cda-server-2,False,13383.87813782692,"{'sample_time_ms': 36569.48, 'num_steps_trained': 434400, 'grad_time_ms': 375.044, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 423.71539306640625, 'policy_loss': -0.15861457586288452, 'vf_explained_var': 0.03651801869273186, 'entropy': 9.447165489196777, 'cur_lr': 4.999999873689376e-05, 'total_loss': 423.5886535644531, 'kl': 0.013986926525831223}, 'load_time_ms': 0.719, 'num_steps_sampled': 434400, 'update_time_ms': 2.637}",362,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.70542049407959,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,434400,434400,{},362,50,-89.92808180406912,2025-09-04_19-55-58,8.000000400012878,3651948,1757008558,-30.085141804347266,13383.87813782692,11790,22.97
+cda-server-2,False,13424.643053531647,"{'sample_time_ms': 37240.139, 'num_steps_trained': 435600, 'grad_time_ms': 374.911, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 517.852294921875, 'policy_loss': -0.15835538506507874, 'vf_explained_var': 0.014234628528356552, 'entropy': 9.38388442993164, 'cur_lr': 4.999999873689376e-05, 'total_loss': 517.7247314453125, 'kl': 0.013516398146748543}, 'load_time_ms': 0.704, 'num_steps_sampled': 435600, 'update_time_ms': 2.648}",363,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.76491570472717,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,435600,435600,{},363,51,-90.54177969504582,2025-09-04_19-56-38,8.000000953335292,3651948,1757008598,-32.38749691000124,13424.643053531647,11841,24.02
+cda-server-2,False,13465.295440912247,"{'sample_time_ms': 37915.171, 'num_steps_trained': 436800, 'grad_time_ms': 374.717, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 351.9696350097656, 'policy_loss': -0.16030652821063995, 'vf_explained_var': 0.03496674820780754, 'entropy': 9.348217964172363, 'cur_lr': 4.999999873689376e-05, 'total_loss': 351.8398132324219, 'kl': 0.013371977023780346}, 'load_time_ms': 0.706, 'num_steps_sampled': 436800, 'update_time_ms': 2.635}",364,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.652387380599976,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,436800,436800,{},364,50,-90.54177969504582,2025-09-04_19-57-19,8.000000953335292,3651948,1757008639,-32.644767126636886,13465.295440912247,11891,24.3
+cda-server-2,False,13506.65328836441,"{'sample_time_ms': 38593.073, 'num_steps_trained': 438000, 'grad_time_ms': 375.016, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 417.3665771484375, 'policy_loss': -0.1613299399614334, 'vf_explained_var': 0.03704684227705002, 'entropy': 9.711258888244629, 'cur_lr': 4.999999873689376e-05, 'total_loss': 417.2366027832031, 'kl': 0.013744776137173176}, 'load_time_ms': 0.709, 'num_steps_sampled': 438000, 'update_time_ms': 2.588}",365,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.357847452163696,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,438000,438000,{},365,57,-90.32940556313206,2025-09-04_19-58-00,8.000000515834063,3651948,1757008680,-29.322009071777508,13506.65328836441,11948,22.72
+cda-server-2,False,13547.770455598831,"{'sample_time_ms': 39210.356, 'num_steps_trained': 439200, 'grad_time_ms': 371.913, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 422.9631652832031, 'policy_loss': -0.1678367406129837, 'vf_explained_var': 0.015686094760894775, 'entropy': 9.463945388793945, 'cur_lr': 4.999999873689376e-05, 'total_loss': 422.8275146484375, 'kl': 0.014120825566351414}, 'load_time_ms': 0.694, 'num_steps_sampled': 439200, 'update_time_ms': 2.593}",366,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.117167234420776,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,439200,439200,{},366,51,-90.5094447473157,2025-09-04_19-58-42,8.000001058500953,3651948,1757008722,-28.59939437734714,13547.770455598831,11999,22.21
+cda-server-2,False,13589.815601110458,"{'sample_time_ms': 39991.09, 'num_steps_trained': 440400, 'grad_time_ms': 369.185, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 399.17279052734375, 'policy_loss': -0.16194237768650055, 'vf_explained_var': 0.03682759031653404, 'entropy': 9.822941780090332, 'cur_lr': 4.999999873689376e-05, 'total_loss': 399.04144287109375, 'kl': 0.01344168558716774}, 'load_time_ms': 0.693, 'num_steps_sampled': 440400, 'update_time_ms': 2.552}",367,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.0451455116272,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,440400,440400,{},367,63,-90.5094447473157,2025-09-04_19-59-24,8.000102789460353,3651948,1757008764,-25.033929352215065,13589.815601110458,12062,20.45
+cda-server-2,False,13631.255574464798,"{'sample_time_ms': 40592.363, 'num_steps_trained': 441600, 'grad_time_ms': 367.641, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 358.0179443359375, 'policy_loss': -0.17408449947834015, 'vf_explained_var': 0.042325105518102646, 'entropy': 9.080735206604004, 'cur_lr': 4.999999873689376e-05, 'total_loss': 357.87310791015625, 'kl': 0.01283906027674675}, 'load_time_ms': 0.694, 'num_steps_sampled': 441600, 'update_time_ms': 2.485}",368,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.4399733543396,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,441600,441600,{},368,65,-87.88851058948934,2025-09-04_20-00-05,8.000062048492094,3651948,1757008805,-24.182147381921666,13631.255574464798,12127,19.84
+cda-server-2,False,13672.156474590302,"{'sample_time_ms': 40650.12, 'num_steps_trained': 442800, 'grad_time_ms': 365.681, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 454.3436584472656, 'policy_loss': -0.16536158323287964, 'vf_explained_var': 0.009322080761194229, 'entropy': 9.624226570129395, 'cur_lr': 4.999999873689376e-05, 'total_loss': 454.2071838378906, 'kl': 0.012673533521592617}, 'load_time_ms': 0.682, 'num_steps_sampled': 442800, 'update_time_ms': 2.532}",369,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.90090012550354,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,442800,442800,{},369,44,-87.88851058948934,2025-09-04_20-00-46,8.000062048492094,3651948,1757008846,-28.460736643779697,13672.156474590302,12171,22.0
+cda-server-2,False,13713.61516404152,"{'sample_time_ms': 40736.241, 'num_steps_trained': 444000, 'grad_time_ms': 365.856, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 368.1820373535156, 'policy_loss': -0.1646040827035904, 'vf_explained_var': 0.02997763268649578, 'entropy': 9.514936447143555, 'cur_lr': 4.999999873689376e-05, 'total_loss': 368.04901123046875, 'kl': 0.01386441383510828}, 'load_time_ms': 0.686, 'num_steps_sampled': 444000, 'update_time_ms': 2.515}",370,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.45868945121765,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,444000,444000,{},370,55,-87.94120920451901,2025-09-04_20-01-27,8.000000643938543,3651948,1757008887,-32.296061636067925,13713.61516404152,12226,24.11
+cda-server-2,False,13754.44764304161,"{'sample_time_ms': 40752.545, 'num_steps_trained': 445200, 'grad_time_ms': 367.047, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 405.9430847167969, 'policy_loss': -0.1600293219089508, 'vf_explained_var': 0.024258123710751534, 'entropy': 9.467537879943848, 'cur_lr': 4.999999873689376e-05, 'total_loss': 405.8106994628906, 'kl': 0.012127561494708061}, 'load_time_ms': 0.697, 'num_steps_sampled': 445200, 'update_time_ms': 2.503}",371,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.83247900009155,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,445200,445200,{},371,55,-87.94120920451901,2025-09-04_20-02-08,8.000144909312752,3651948,1757008928,-28.713782958618218,13754.44764304161,12281,22.24
+cda-server-2,False,13795.328585147858,"{'sample_time_ms': 40770.655, 'num_steps_trained': 446400, 'grad_time_ms': 366.513, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 456.3870544433594, 'policy_loss': -0.1620740294456482, 'vf_explained_var': 0.01017056591808796, 'entropy': 9.62911605834961, 'cur_lr': 4.999999873689376e-05, 'total_loss': 456.2552185058594, 'kl': 0.013279477134346962}, 'load_time_ms': 0.684, 'num_steps_sampled': 446400, 'update_time_ms': 2.523}",372,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.88094210624695,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,446400,446400,{},372,58,-89.48098546853737,2025-09-04_20-02-49,8.000144909312752,3651948,1757008969,-25.558417428568774,13795.328585147858,12339,20.73
+cda-server-2,False,13836.142918586731,"{'sample_time_ms': 40775.215, 'num_steps_trained': 447600, 'grad_time_ms': 366.855, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 414.3948059082031, 'policy_loss': -0.16606897115707397, 'vf_explained_var': 0.0313444547355175, 'entropy': 9.408416748046875, 'cur_lr': 4.999999873689376e-05, 'total_loss': 414.2598876953125, 'kl': 0.013675946742296219}, 'load_time_ms': 0.693, 'num_steps_sampled': 447600, 'update_time_ms': 2.502}",373,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.81433343887329,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,447600,447600,{},373,55,-89.48098546853737,2025-09-04_20-03-30,8.000030911466938,3651948,1757009010,-26.950057909762915,13836.142918586731,12394,21.42
+cda-server-2,False,13876.879835128784,"{'sample_time_ms': 40783.808, 'num_steps_trained': 448800, 'grad_time_ms': 366.716, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 429.29058837890625, 'policy_loss': -0.16602841019630432, 'vf_explained_var': 0.02585785835981369, 'entropy': 9.908772468566895, 'cur_lr': 4.999999873689376e-05, 'total_loss': 429.1557312011719, 'kl': 0.01369208749383688}, 'load_time_ms': 0.681, 'num_steps_sampled': 448800, 'update_time_ms': 2.509}",374,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.73691654205322,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,448800,448800,{},374,46,-88.70048957065029,2025-09-04_20-04-11,8.00000041171073,3651948,1757009051,-31.440366763057263,13876.879835128784,12440,23.98
+cda-server-2,False,13917.71957039833,"{'sample_time_ms': 40732.519, 'num_steps_trained': 450000, 'grad_time_ms': 366.135, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 419.1043395996094, 'policy_loss': -0.16186374425888062, 'vf_explained_var': 0.018550297245383263, 'entropy': 9.463047981262207, 'cur_lr': 4.999999873689376e-05, 'total_loss': 418.9734802246094, 'kl': 0.01360340416431427}, 'load_time_ms': 0.688, 'num_steps_sampled': 450000, 'update_time_ms': 2.533}",375,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.83973526954651,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,450000,450000,{},375,66,-88.70048957065029,2025-09-04_20-04-52,8.000211419224676,3651948,1757009092,-26.563536613376524,13917.71957039833,12506,21.25
+cda-server-2,False,13958.300779104233,"{'sample_time_ms': 40678.661, 'num_steps_trained': 451200, 'grad_time_ms': 366.376, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 341.96826171875, 'policy_loss': -0.16432610154151917, 'vf_explained_var': 0.058409616351127625, 'entropy': 9.503949165344238, 'cur_lr': 4.999999873689376e-05, 'total_loss': 341.83319091796875, 'kl': 0.012845533899962902}, 'load_time_ms': 0.69, 'num_steps_sampled': 451200, 'update_time_ms': 2.547}",376,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.5812087059021,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,451200,451200,{},376,51,-88.40622192230724,2025-09-04_20-05-32,8.000211419224676,3651948,1757009132,-26.488740991512877,13958.300779104233,12557,21.14
+cda-server-2,False,13998.84181547165,"{'sample_time_ms': 40527.821, 'num_steps_trained': 452400, 'grad_time_ms': 366.813, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 400.8475036621094, 'policy_loss': -0.14988702535629272, 'vf_explained_var': 0.022728238254785538, 'entropy': 9.669713973999023, 'cur_lr': 4.999999873689376e-05, 'total_loss': 400.728515625, 'kl': 0.013556399382650852}, 'load_time_ms': 0.696, 'num_steps_sampled': 452400, 'update_time_ms': 2.555}",377,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.54103636741638,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,452400,452400,{},377,55,-90.1056422956086,2025-09-04_20-06-13,8.00056570814457,3651948,1757009173,-29.022988951470996,13998.84181547165,12612,22.47
+cda-server-2,False,14039.874541521072,"{'sample_time_ms': 40484.34, 'num_steps_trained': 453600, 'grad_time_ms': 369.549, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 316.0495910644531, 'policy_loss': -0.1594245731830597, 'vf_explained_var': 0.042283281683921814, 'entropy': 9.659835815429688, 'cur_lr': 4.999999873689376e-05, 'total_loss': 315.9207458496094, 'kl': 0.013414965011179447}, 'load_time_ms': 0.696, 'num_steps_sampled': 453600, 'update_time_ms': 2.589}",378,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.03272604942322,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,453600,453600,{},378,57,-90.1056422956086,2025-09-04_20-06-54,8.00056570814457,3651948,1757009214,-26.929301530074344,14039.874541521072,12669,21.38
+cda-server-2,False,14080.765124797821,"{'sample_time_ms': 40480.4, 'num_steps_trained': 454800, 'grad_time_ms': 372.458, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 435.4676818847656, 'policy_loss': -0.1634068638086319, 'vf_explained_var': 0.011056158691644669, 'entropy': 9.445538520812988, 'cur_lr': 4.999999873689376e-05, 'total_loss': 435.33685302734375, 'kl': 0.014295559376478195}, 'load_time_ms': 0.698, 'num_steps_sampled': 454800, 'update_time_ms': 2.585}",379,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.89058327674866,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,454800,454800,{},379,52,-87.64420739712868,2025-09-04_20-07-35,8.000242219782981,3651948,1757009255,-28.247261511853285,14080.765124797821,12721,22.07
+cda-server-2,False,14121.779526948929,"{'sample_time_ms': 40437.328, 'num_steps_trained': 456000, 'grad_time_ms': 371.061, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 386.6919250488281, 'policy_loss': -0.1640903651714325, 'vf_explained_var': 0.04491328448057175, 'entropy': 9.564139366149902, 'cur_lr': 4.999999873689376e-05, 'total_loss': 386.5631103515625, 'kl': 0.015498373657464981}, 'load_time_ms': 0.684, 'num_steps_sampled': 456000, 'update_time_ms': 2.621}",380,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.01440215110779,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,456000,456000,{},380,58,-88.34600534138386,2025-09-04_20-08-16,8.000242219782981,3651948,1757009296,-26.5321176185136,14121.779526948929,12779,21.23
+cda-server-2,False,14163.406922578812,"{'sample_time_ms': 40516.053, 'num_steps_trained': 457200, 'grad_time_ms': 371.811, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 426.4010009765625, 'policy_loss': -0.1595669388771057, 'vf_explained_var': 0.0193032156676054, 'entropy': 9.629157066345215, 'cur_lr': 4.999999873689376e-05, 'total_loss': 426.2713623046875, 'kl': 0.013127539306879044}, 'load_time_ms': 0.675, 'num_steps_sampled': 457200, 'update_time_ms': 2.618}",381,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.62739562988281,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,457200,457200,{},381,55,-88.34600534138386,2025-09-04_20-08-58,8.000000676992856,3651948,1757009338,-26.956296022717453,14163.406922578812,12834,21.46
+cda-server-2,False,14204.033453941345,"{'sample_time_ms': 40489.187, 'num_steps_trained': 458400, 'grad_time_ms': 373.168, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 369.1617736816406, 'policy_loss': -0.16840365529060364, 'vf_explained_var': 0.03676861524581909, 'entropy': 9.394706726074219, 'cur_lr': 4.999999873689376e-05, 'total_loss': 369.0248718261719, 'kl': 0.01381840929389}, 'load_time_ms': 0.697, 'num_steps_sampled': 458400, 'update_time_ms': 2.656}",382,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.62653136253357,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,458400,458400,{},382,60,-91.67659381090598,2025-09-04_20-09-38,8.000002013209494,3651948,1757009378,-24.28655712964201,14204.033453941345,12894,20.16
+cda-server-2,False,14244.760761737823,"{'sample_time_ms': 40481.404, 'num_steps_trained': 459600, 'grad_time_ms': 372.245, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 454.9277648925781, 'policy_loss': -0.17204974591732025, 'vf_explained_var': 0.0228937529027462, 'entropy': 9.530159950256348, 'cur_lr': 4.999999873689376e-05, 'total_loss': 454.7872619628906, 'kl': 0.013844618573784828}, 'load_time_ms': 0.7, 'num_steps_sampled': 459600, 'update_time_ms': 2.673}",383,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.72730779647827,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,459600,459600,{},383,46,-91.67659381090598,2025-09-04_20-10-19,8.000002013209494,3651948,1757009419,-30.569119944998008,14244.760761737823,12940,23.15
+cda-server-2,False,14285.703725337982,"{'sample_time_ms': 40503.776, 'num_steps_trained': 460800, 'grad_time_ms': 370.483, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 379.1144714355469, 'policy_loss': -0.1502944678068161, 'vf_explained_var': 0.02822817675769329, 'entropy': 9.340314865112305, 'cur_lr': 4.999999873689376e-05, 'total_loss': 378.99749755859375, 'kl': 0.014621545560657978}, 'load_time_ms': 0.712, 'num_steps_sampled': 460800, 'update_time_ms': 2.648}",384,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.94296360015869,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,460800,460800,{},384,53,-89.63771809272924,2025-09-04_20-11-00,8.000121973071964,3651948,1757009460,-31.947966056254174,14285.703725337982,12993,24.02
+cda-server-2,False,14326.910396814346,"{'sample_time_ms': 40538.964, 'num_steps_trained': 462000, 'grad_time_ms': 372.018, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 427.7789001464844, 'policy_loss': -0.1649748533964157, 'vf_explained_var': 0.02137676253914833, 'entropy': 8.925230026245117, 'cur_lr': 4.999999873689376e-05, 'total_loss': 427.64898681640625, 'kl': 0.015390059910714626}, 'load_time_ms': 0.702, 'num_steps_sampled': 462000, 'update_time_ms': 2.624}",385,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.206671476364136,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,462000,462000,{},385,60,-87.39165532447993,2025-09-04_20-11-41,8.000121973071964,3651948,1757009501,-25.95266022397698,14326.910396814346,13053,20.98
+cda-server-2,False,14367.425111293793,"{'sample_time_ms': 40531.064, 'num_steps_trained': 463200, 'grad_time_ms': 373.23, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 405.1592102050781, 'policy_loss': -0.16958096623420715, 'vf_explained_var': 0.03342774137854576, 'entropy': 9.57955265045166, 'cur_lr': 4.999999873689376e-05, 'total_loss': 405.0227966308594, 'kl': 0.014559010975062847}, 'load_time_ms': 0.708, 'num_steps_sampled': 463200, 'update_time_ms': 2.611}",386,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.51471447944641,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,463200,463200,{},386,50,-87.86350126831186,2025-09-04_20-12-22,6.0000964397704015,3651948,1757009542,-28.921221890397575,14367.425111293793,13103,22.53
+cda-server-2,False,14409.479347467422,"{'sample_time_ms': 40682.346, 'num_steps_trained': 464400, 'grad_time_ms': 373.202, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 447.49566650390625, 'policy_loss': -0.16376593708992004, 'vf_explained_var': 0.007484721019864082, 'entropy': 9.31185531616211, 'cur_lr': 4.999999873689376e-05, 'total_loss': 447.3653259277344, 'kl': 0.014663223177194595}, 'load_time_ms': 0.706, 'num_steps_sampled': 464400, 'update_time_ms': 2.635}",387,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.05423617362976,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,464400,464400,{},387,51,-89.42840434782649,2025-09-04_20-13-04,8.000000418458303,3651948,1757009584,-31.67022643060836,14409.479347467422,13154,23.57
+cda-server-2,False,14450.809123754501,"{'sample_time_ms': 40711.965, 'num_steps_trained': 465600, 'grad_time_ms': 373.285, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 395.99224853515625, 'policy_loss': -0.16208644211292267, 'vf_explained_var': 0.028500132262706757, 'entropy': 8.899362564086914, 'cur_lr': 4.999999873689376e-05, 'total_loss': 395.8634033203125, 'kl': 0.014577767811715603}, 'load_time_ms': 0.72, 'num_steps_sampled': 465600, 'update_time_ms': 2.638}",388,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.32977628707886,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,465600,465600,{},388,67,-90.16215763428154,2025-09-04_20-13-45,8.000110995598888,3651948,1757009625,-25.940163846362545,14450.809123754501,13221,20.32
+cda-server-2,False,14491.643397331238,"{'sample_time_ms': 40709.286, 'num_steps_trained': 466800, 'grad_time_ms': 370.361, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 403.9291076660156, 'policy_loss': -0.16290004551410675, 'vf_explained_var': 0.01675303839147091, 'entropy': 9.276033401489258, 'cur_lr': 4.999999873689376e-05, 'total_loss': 403.7976989746094, 'kl': 0.013814728707075119}, 'load_time_ms': 0.723, 'num_steps_sampled': 466800, 'update_time_ms': 2.613}",389,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.83427357673645,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,466800,466800,{},389,53,-90.16215763428154,2025-09-04_20-14-26,8.00134407488727,3651948,1757009666,-22.927386873399293,14491.643397331238,13274,19.03
+cda-server-2,False,14532.366207122803,"{'sample_time_ms': 40681.65, 'num_steps_trained': 468000, 'grad_time_ms': 368.945, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 429.4143371582031, 'policy_loss': -0.16146515309810638, 'vf_explained_var': 0.026131371036171913, 'entropy': 9.468842506408691, 'cur_lr': 4.999999873689376e-05, 'total_loss': 429.2826843261719, 'kl': 0.013080236501991749}, 'load_time_ms': 0.723, 'num_steps_sampled': 468000, 'update_time_ms': 2.594}",390,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.72280979156494,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,468000,468000,{},390,45,-90.54016901974268,2025-09-04_20-15-07,8.00134407488727,3651948,1757009707,-30.35186924277821,14532.366207122803,13319,23.24
+cda-server-2,False,14573.639463424683,"{'sample_time_ms': 40646.132, 'num_steps_trained': 469200, 'grad_time_ms': 369.035, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 487.1026916503906, 'policy_loss': -0.1664435863494873, 'vf_explained_var': 0.027938902378082275, 'entropy': 9.025018692016602, 'cur_lr': 4.999999873689376e-05, 'total_loss': 486.9728088378906, 'kl': 0.0160539373755455}, 'load_time_ms': 0.729, 'num_steps_sampled': 469200, 'update_time_ms': 2.573}",391,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.27325630187988,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,469200,469200,{},391,60,-90.54016901974268,2025-09-04_20-15-48,8.000111657128002,3651948,1757009748,-30.182975917074646,14573.639463424683,13379,22.94
+cda-server-2,False,14614.474596261978,"{'sample_time_ms': 40667.889, 'num_steps_trained': 470400, 'grad_time_ms': 368.189, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 321.1077575683594, 'policy_loss': -0.164377361536026, 'vf_explained_var': 0.019441213458776474, 'entropy': 9.312178611755371, 'cur_lr': 4.999999873689376e-05, 'total_loss': 320.9761962890625, 'kl': 0.014411866664886475}, 'load_time_ms': 0.709, 'num_steps_sampled': 470400, 'update_time_ms': 2.529}",392,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.83513283729553,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,470400,470400,{},392,61,-88.94411633292573,2025-09-04_20-16-29,8.00027034236108,3651948,1757009789,-22.698768921422488,14614.474596261978,13440,19.31
+cda-server-2,False,14655.467163801193,"{'sample_time_ms': 40696.22, 'num_steps_trained': 471600, 'grad_time_ms': 366.417, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 371.70098876953125, 'policy_loss': -0.16502158343791962, 'vf_explained_var': 0.02959388494491577, 'entropy': 9.293415069580078, 'cur_lr': 4.999999873689376e-05, 'total_loss': 371.5666809082031, 'kl': 0.013496254570782185}, 'load_time_ms': 0.705, 'num_steps_sampled': 471600, 'update_time_ms': 2.528}",393,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.99256753921509,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,471600,471600,{},393,63,-87.27054088007134,2025-09-04_20-17-10,8.002463732136958,3651948,1757009830,-22.09301271758532,14655.467163801193,13503,19.13
+cda-server-2,False,14696.118603467941,"{'sample_time_ms': 40666.56, 'num_steps_trained': 472800, 'grad_time_ms': 366.908, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 288.21826171875, 'policy_loss': -0.1636689454317093, 'vf_explained_var': 0.041789062321186066, 'entropy': 9.057210922241211, 'cur_lr': 4.999999873689376e-05, 'total_loss': 288.0855712890625, 'kl': 0.013619553297758102}, 'load_time_ms': 0.703, 'num_steps_sampled': 472800, 'update_time_ms': 2.504}",394,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.65143966674805,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,472800,472800,{},394,65,-87.27054088007134,2025-09-04_20-17-51,8.002463732136958,3651948,1757009871,-20.577291466937563,14696.118603467941,13568,18.27
+cda-server-2,False,14737.008077859879,"{'sample_time_ms': 40635.738, 'num_steps_trained': 474000, 'grad_time_ms': 366.008, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 371.5557861328125, 'policy_loss': -0.15123039484024048, 'vf_explained_var': 0.026034004986286163, 'entropy': 8.815576553344727, 'cur_lr': 4.999999873689376e-05, 'total_loss': 371.4381103515625, 'kl': 0.014736750163137913}, 'load_time_ms': 0.705, 'num_steps_sampled': 474000, 'update_time_ms': 2.528}",395,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.889474391937256,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,474000,474000,{},395,65,-88.93210368807651,2025-09-04_20-18-31,8.000782773261495,3651948,1757009911,-21.1190401483267,14737.008077859879,13633,18.36
+cda-server-2,False,14778.32446694374,"{'sample_time_ms': 40715.737, 'num_steps_trained': 475200, 'grad_time_ms': 366.141, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 367.843017578125, 'policy_loss': -0.1676916778087616, 'vf_explained_var': 0.031155500560998917, 'entropy': 9.684024810791016, 'cur_lr': 4.999999873689376e-05, 'total_loss': 367.703857421875, 'kl': 0.01251928135752678}, 'load_time_ms': 0.699, 'num_steps_sampled': 475200, 'update_time_ms': 2.533}",396,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.316389083862305,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,475200,475200,{},396,57,-93.22473450272642,2025-09-04_20-19-13,8.000526899887358,3651948,1757009953,-24.300092952611994,14778.32446694374,13690,19.97
+cda-server-2,False,14819.988945007324,"{'sample_time_ms': 40676.638, 'num_steps_trained': 476400, 'grad_time_ms': 366.285, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 365.5447082519531, 'policy_loss': -0.17106308043003082, 'vf_explained_var': 0.04816317558288574, 'entropy': 9.278183937072754, 'cur_lr': 4.999999873689376e-05, 'total_loss': 365.40704345703125, 'kl': 0.014639385975897312}, 'load_time_ms': 0.698, 'num_steps_sampled': 476400, 'update_time_ms': 2.542}",397,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.664478063583374,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,476400,476400,{},397,55,-87.72401504139262,2025-09-04_20-19-54,8.000040918922195,3651948,1757009994,-25.846434901990747,14819.988945007324,13745,20.99
+cda-server-2,False,14860.932942867279,"{'sample_time_ms': 40638.65, 'num_steps_trained': 477600, 'grad_time_ms': 365.779, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 445.75152587890625, 'policy_loss': -0.1681382954120636, 'vf_explained_var': 0.036400895565748215, 'entropy': 9.017210006713867, 'cur_lr': 4.999999873689376e-05, 'total_loss': 445.6226806640625, 'kl': 0.017247028648853302}, 'load_time_ms': 0.681, 'num_steps_sampled': 477600, 'update_time_ms': 2.507}",398,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.943997859954834,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,477600,477600,{},398,58,-89.13693164817103,2025-09-04_20-20-35,8.000040918922195,3651948,1757010035,-26.30384903736493,14860.932942867279,13803,20.94
+cda-server-2,False,14903.178161382675,"{'sample_time_ms': 40776.198, 'num_steps_trained': 478800, 'grad_time_ms': 369.258, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 325.0050048828125, 'policy_loss': -0.1537511646747589, 'vf_explained_var': 0.05926014482975006, 'entropy': 9.142744064331055, 'cur_lr': 4.999999873689376e-05, 'total_loss': 324.8810119628906, 'kl': 0.013057458214461803}, 'load_time_ms': 0.688, 'num_steps_sampled': 478800, 'update_time_ms': 2.53}",399,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.24521851539612,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,478800,478800,{},399,74,-89.13693164817103,2025-09-04_20-21-18,8.000100239433214,3651948,1757010078,-20.542233135222364,14903.178161382675,13877,17.71
+cda-server-2,False,14944.027264595032,"{'sample_time_ms': 40786.85, 'num_steps_trained': 480000, 'grad_time_ms': 371.183, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 363.21917724609375, 'policy_loss': -0.16554878652095795, 'vf_explained_var': 0.03735869377851486, 'entropy': 8.781224250793457, 'cur_lr': 4.999999873689376e-05, 'total_loss': 363.0849914550781, 'kl': 0.013765843585133553}, 'load_time_ms': 0.697, 'num_steps_sampled': 480000, 'update_time_ms': 2.524}",400,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.84910321235657,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,480000,480000,{},400,56,-89.46152612989916,2025-09-04_20-21-59,8.00016840275882,3651948,1757010119,-21.505756565397533,14944.027264595032,13933,18.4
+cda-server-2,False,14985.088542938232,"{'sample_time_ms': 40764.756, 'num_steps_trained': 481200, 'grad_time_ms': 372.139, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 384.45416259765625, 'policy_loss': -0.16606341302394867, 'vf_explained_var': 0.03621109947562218, 'entropy': 8.954419136047363, 'cur_lr': 4.999999873689376e-05, 'total_loss': 384.3221435546875, 'kl': 0.014945581555366516}, 'load_time_ms': 0.708, 'num_steps_sampled': 481200, 'update_time_ms': 2.527}",401,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.061278343200684,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,481200,481200,{},401,62,-88.78279717902913,2025-09-04_20-22-40,8.000649660237048,3651948,1757010160,-22.178985335804878,14985.088542938232,13995,18.94
+cda-server-2,False,15026.196497917175,"{'sample_time_ms': 40794.298, 'num_steps_trained': 482400, 'grad_time_ms': 369.928, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 398.8265380859375, 'policy_loss': -0.1774124801158905, 'vf_explained_var': 0.029825767502188683, 'entropy': 9.167211532592773, 'cur_lr': 4.999999873689376e-05, 'total_loss': 398.6807861328125, 'kl': 0.013916068710386753}, 'load_time_ms': 0.711, 'num_steps_sampled': 482400, 'update_time_ms': 2.526}",402,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.10795497894287,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,482400,482400,{},402,57,-88.28649652581946,2025-09-04_20-23-21,8.000649660237048,3651948,1757010201,-25.26251572338901,15026.196497917175,14052,20.54
+cda-server-2,False,15066.95999789238,"{'sample_time_ms': 40770.654, 'num_steps_trained': 483600, 'grad_time_ms': 370.621, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 468.6005859375, 'policy_loss': -0.15460431575775146, 'vf_explained_var': 0.013424217700958252, 'entropy': 8.968843460083008, 'cur_lr': 4.999999873689376e-05, 'total_loss': 468.4796142578125, 'kl': 0.01474261749535799}, 'load_time_ms': 0.713, 'num_steps_sampled': 483600, 'update_time_ms': 2.513}",403,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.76349997520447,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,483600,483600,{},403,64,-90.62365731373188,2025-09-04_20-24-02,8.000064498918023,3651948,1757010242,-26.25160861817275,15066.95999789238,14116,20.91
+cda-server-2,False,15107.76928973198,"{'sample_time_ms': 40784.239, 'num_steps_trained': 484800, 'grad_time_ms': 372.769, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 341.62896728515625, 'policy_loss': -0.15651313960552216, 'vf_explained_var': 0.02480602264404297, 'entropy': 9.093782424926758, 'cur_lr': 4.999999873689376e-05, 'total_loss': 341.5036926269531, 'kl': 0.01371256448328495}, 'load_time_ms': 0.716, 'num_steps_sampled': 484800, 'update_time_ms': 2.562}",404,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.80929183959961,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,484800,484800,{},404,57,-90.62365731373188,2025-09-04_20-24-42,8.000064498918023,3651948,1757010282,-22.69270314497754,15107.76928973198,14173,19.11
+cda-server-2,False,15149.706801652908,"{'sample_time_ms': 40887.744, 'num_steps_trained': 486000, 'grad_time_ms': 374.035, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 356.63665771484375, 'policy_loss': -0.1520189642906189, 'vf_explained_var': 0.04524644836783409, 'entropy': 8.970779418945312, 'cur_lr': 4.999999873689376e-05, 'total_loss': 356.5178527832031, 'kl': 0.014575008302927017}, 'load_time_ms': 0.709, 'num_steps_sampled': 486000, 'update_time_ms': 2.593}",405,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.937511920928955,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,486000,486000,{},405,66,-87.43413320958629,2025-09-04_20-25-24,8.000628943879118,3651948,1757010324,-23.06051815945549,15149.706801652908,14239,19.23
+cda-server-2,False,15190.235967874527,"{'sample_time_ms': 40807.353, 'num_steps_trained': 487200, 'grad_time_ms': 375.696, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 338.1393737792969, 'policy_loss': -0.155076265335083, 'vf_explained_var': 0.046628501266241074, 'entropy': 8.94325065612793, 'cur_lr': 4.999999873689376e-05, 'total_loss': 338.01953125, 'kl': 0.015466567128896713}, 'load_time_ms': 0.712, 'num_steps_sampled': 487200, 'update_time_ms': 2.602}",406,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.52916622161865,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,487200,487200,{},406,59,-90.0509973386746,2025-09-04_20-26-05,8.000186777192573,3651948,1757010365,-23.565941168375375,15190.235967874527,14298,19.52
+cda-server-2,False,15231.009989500046,"{'sample_time_ms': 40718.354, 'num_steps_trained': 488400, 'grad_time_ms': 375.663, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 379.54412841796875, 'policy_loss': -0.17857927083969116, 'vf_explained_var': 0.02515769749879837, 'entropy': 9.250743865966797, 'cur_lr': 4.999999873689376e-05, 'total_loss': 379.3961486816406, 'kl': 0.013426919467747211}, 'load_time_ms': 0.715, 'num_steps_sampled': 488400, 'update_time_ms': 2.587}",407,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.7740216255188,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,488400,488400,{},407,60,-90.0509973386746,2025-09-04_20-26-46,8.000000718700344,3651948,1757010406,-23.898297838288936,15231.009989500046,14358,19.94
+cda-server-2,False,15271.720307350159,"{'sample_time_ms': 40693.749, 'num_steps_trained': 489600, 'grad_time_ms': 376.866, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 342.9323425292969, 'policy_loss': -0.1711226999759674, 'vf_explained_var': 0.032989416271448135, 'entropy': 9.607444763183594, 'cur_lr': 4.999999873689376e-05, 'total_loss': 342.7926025390625, 'kl': 0.01377950981259346}, 'load_time_ms': 0.73, 'num_steps_sampled': 489600, 'update_time_ms': 2.596}",408,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.710317850112915,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,489600,489600,{},408,55,-88.34572210914138,2025-09-04_20-27-26,8.001231774066822,3651948,1757010446,-24.977491475832686,15271.720307350159,14413,20.5
+cda-server-2,False,15313.139620065689,"{'sample_time_ms': 40614.501, 'num_steps_trained': 490800, 'grad_time_ms': 373.588, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 357.11383056640625, 'policy_loss': -0.155589297413826, 'vf_explained_var': 0.04398070275783539, 'entropy': 9.158653259277344, 'cur_lr': 4.999999873689376e-05, 'total_loss': 356.9899597167969, 'kl': 0.013927659951150417}, 'load_time_ms': 0.721, 'num_steps_sampled': 490800, 'update_time_ms': 2.567}",409,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.419312715530396,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,490800,490800,{},409,65,-88.34572210914138,2025-09-04_20-28-08,8.001232736280405,3651948,1757010488,-22.277063366041194,15313.139620065689,14478,19.01
+cda-server-2,False,15354.477598190308,"{'sample_time_ms': 40662.644, 'num_steps_trained': 492000, 'grad_time_ms': 374.315, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 337.18524169921875, 'policy_loss': -0.15743833780288696, 'vf_explained_var': 0.04267461597919464, 'entropy': 8.85062026977539, 'cur_lr': 4.999999873689376e-05, 'total_loss': 337.0594482421875, 'kl': 0.013875171542167664}, 'load_time_ms': 0.721, 'num_steps_sampled': 492000, 'update_time_ms': 2.599}",410,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.33797812461853,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,492000,492000,{},410,65,-86.17369014343335,2025-09-04_20-28-49,8.001232736280405,3651948,1757010529,-21.66543818727694,15354.477598190308,14543,18.7
+cda-server-2,False,15395.615855932236,"{'sample_time_ms': 40672.516, 'num_steps_trained': 493200, 'grad_time_ms': 372.115, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 397.2659912109375, 'policy_loss': -0.1600235104560852, 'vf_explained_var': 0.027716312557458878, 'entropy': 9.102761268615723, 'cur_lr': 4.999999873689376e-05, 'total_loss': 397.1380615234375, 'kl': 0.014104213565587997}, 'load_time_ms': 0.707, 'num_steps_sampled': 493200, 'update_time_ms': 2.616}",411,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.1382577419281,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,493200,493200,{},411,58,-90.11870080925362,2025-09-04_20-29-30,8.000309162350467,3651948,1757010570,-22.313247212803297,15395.615855932236,14601,19.06
+cda-server-2,False,15436.477092981339,"{'sample_time_ms': 40645.312, 'num_steps_trained': 494400, 'grad_time_ms': 374.636, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 257.9275207519531, 'policy_loss': -0.17039088904857635, 'vf_explained_var': 0.04173828661441803, 'entropy': 8.86276626586914, 'cur_lr': 4.999999873689376e-05, 'total_loss': 257.7919616699219, 'kl': 0.015289144590497017}, 'load_time_ms': 0.698, 'num_steps_sampled': 494400, 'update_time_ms': 2.619}",412,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.86123704910278,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,494400,494400,{},412,59,-90.11870080925362,2025-09-04_20-30-11,8.000309162350467,3651948,1757010611,-23.694892449798267,15436.477092981339,14660,19.97
+cda-server-2,False,15477.587541103363,"{'sample_time_ms': 40678.015, 'num_steps_trained': 495600, 'grad_time_ms': 376.678, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 406.9647521972656, 'policy_loss': -0.16801682114601135, 'vf_explained_var': 0.02449742890894413, 'entropy': 9.003397941589355, 'cur_lr': 4.999999873689376e-05, 'total_loss': 406.8297424316406, 'kl': 0.014499634504318237}, 'load_time_ms': 0.694, 'num_steps_sampled': 495600, 'update_time_ms': 2.619}",413,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.110448122024536,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,495600,495600,{},413,64,-89.96225009423195,2025-09-04_20-30-52,8.000000418517125,3651948,1757010652,-22.641117558423833,15477.587541103363,14724,19.24
+cda-server-2,False,15518.879135847092,"{'sample_time_ms': 40728.988, 'num_steps_trained': 496800, 'grad_time_ms': 374.005, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 360.8838195800781, 'policy_loss': -0.16797587275505066, 'vf_explained_var': 0.02211601845920086, 'entropy': 8.923038482666016, 'cur_lr': 4.999999873689376e-05, 'total_loss': 360.7455139160156, 'kl': 0.013053220696747303}, 'load_time_ms': 0.683, 'num_steps_sampled': 496800, 'update_time_ms': 2.605}",414,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.29159474372864,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,496800,496800,{},414,70,-89.29466862761319,2025-09-04_20-31-34,8.000049041274412,3651948,1757010694,-20.614226509945286,15518.879135847092,14794,18.07
+cda-server-2,False,15559.707585334778,"{'sample_time_ms': 40619.13, 'num_steps_trained': 498000, 'grad_time_ms': 372.965, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 336.3121337890625, 'policy_loss': -0.1546928435564041, 'vf_explained_var': 0.039485231041908264, 'entropy': 8.942896842956543, 'cur_lr': 4.999999873689376e-05, 'total_loss': 336.1910095214844, 'kl': 0.014735047705471516}, 'load_time_ms': 0.689, 'num_steps_sampled': 498000, 'update_time_ms': 2.54}",415,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.82844948768616,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,498000,498000,{},415,50,-88.01235413526035,2025-09-04_20-32-15,8.000049041274412,3651948,1757010735,-23.266062617247893,15559.707585334778,14844,19.66
+cda-server-2,False,15600.588601827621,"{'sample_time_ms': 40655.156, 'num_steps_trained': 499200, 'grad_time_ms': 372.174, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 329.1138916015625, 'policy_loss': -0.15749989449977875, 'vf_explained_var': 0.026625534519553185, 'entropy': 8.95058536529541, 'cur_lr': 4.999999873689376e-05, 'total_loss': 328.990478515625, 'kl': 0.01498242374509573}, 'load_time_ms': 0.686, 'num_steps_sampled': 499200, 'update_time_ms': 2.551}",416,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.88101649284363,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,499200,499200,{},416,55,-88.01235413526035,2025-09-04_20-32-55,8.000000400009691,3651948,1757010775,-29.239176798381415,15600.588601827621,14899,22.97
+cda-server-2,False,15642.138316392899,"{'sample_time_ms': 40730.243, 'num_steps_trained': 500400, 'grad_time_ms': 374.539, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 272.2426452636719, 'policy_loss': -0.15297353267669678, 'vf_explained_var': 0.03704400733113289, 'entropy': 8.66805362701416, 'cur_lr': 4.999999873689376e-05, 'total_loss': 272.1225891113281, 'kl': 0.014451836235821247}, 'load_time_ms': 0.688, 'num_steps_sampled': 500400, 'update_time_ms': 2.568}",417,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.5497145652771,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,500400,500400,{},417,76,-88.21630131251572,2025-09-04_20-33-37,8.000073497850853,3651948,1757010817,-17.679283504079056,15642.138316392899,14975,16.74
+cda-server-2,False,15683.509728908539,"{'sample_time_ms': 40799.427, 'num_steps_trained': 501600, 'grad_time_ms': 371.44, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 248.32025146484375, 'policy_loss': -0.15737244486808777, 'vf_explained_var': 0.04494946449995041, 'entropy': 8.678549766540527, 'cur_lr': 4.999999873689376e-05, 'total_loss': 248.19451904296875, 'kl': 0.01389290764927864}, 'load_time_ms': 0.68, 'num_steps_sampled': 501600, 'update_time_ms': 2.604}",418,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.37141251564026,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,501600,501600,{},418,73,-87.07637775054621,2025-09-04_20-34-18,8.000046023517019,3651948,1757010858,-16.262294965535656,15683.509728908539,15048,15.88
+cda-server-2,False,15725.7066116333,"{'sample_time_ms': 40875.873, 'num_steps_trained': 502800, 'grad_time_ms': 372.738, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 340.843994140625, 'policy_loss': -0.1627647578716278, 'vf_explained_var': 0.050581760704517365, 'entropy': 8.839759826660156, 'cur_lr': 4.999999873689376e-05, 'total_loss': 340.71539306640625, 'kl': 0.014986970461905003}, 'load_time_ms': 0.677, 'num_steps_sampled': 502800, 'update_time_ms': 2.591}",419,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.19688272476196,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,502800,502800,{},419,70,-86.54102131912431,2025-09-04_20-35-01,8.000198525211543,3651948,1757010901,-17.980034185103467,15725.7066116333,15118,16.73
+cda-server-2,False,15766.780923604965,"{'sample_time_ms': 40849.658, 'num_steps_trained': 504000, 'grad_time_ms': 372.638, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 318.67681884765625, 'policy_loss': -0.16638629138469696, 'vf_explained_var': 0.026680052280426025, 'entropy': 8.905611038208008, 'cur_lr': 4.999999873689376e-05, 'total_loss': 318.5440368652344, 'kl': 0.014751172624528408}, 'load_time_ms': 0.667, 'num_steps_sampled': 504000, 'update_time_ms': 2.556}",420,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.07431197166443,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,504000,504000,{},420,69,-86.5520037264815,2025-09-04_20-35-42,8.000133491272962,3651948,1757010942,-19.51052381653333,15766.780923604965,15187,17.71
+cda-server-2,False,15808.38918685913,"{'sample_time_ms': 40897.437, 'num_steps_trained': 505200, 'grad_time_ms': 371.831, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 337.9524230957031, 'policy_loss': -0.16272571682929993, 'vf_explained_var': 0.03287976235151291, 'entropy': 8.645221710205078, 'cur_lr': 4.999999873689376e-05, 'total_loss': 337.8250427246094, 'kl': 0.015531342476606369}, 'load_time_ms': 0.664, 'num_steps_sampled': 505200, 'update_time_ms': 2.613}",421,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.60826325416565,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,505200,505200,{},421,61,-88.78650310319185,2025-09-04_20-36-23,8.000133491272962,3651948,1757010983,-21.506250790355175,15808.38918685913,15248,18.63
+cda-server-2,False,15849.308934688568,"{'sample_time_ms': 40906.205, 'num_steps_trained': 506400, 'grad_time_ms': 368.897, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 297.020751953125, 'policy_loss': -0.1772303581237793, 'vf_explained_var': 0.04066776484251022, 'entropy': 8.614079475402832, 'cur_lr': 4.999999873689376e-05, 'total_loss': 296.8775329589844, 'kl': 0.014923757873475552}, 'load_time_ms': 0.669, 'num_steps_sampled': 506400, 'update_time_ms': 2.616}",422,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.919747829437256,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,506400,506400,{},422,62,-88.78650310319185,2025-09-04_20-37-04,8.000122212751483,3651948,1757011024,-23.060168047597784,15849.308934688568,15310,19.56
+cda-server-2,False,15890.32418012619,"{'sample_time_ms': 40896.764, 'num_steps_trained': 507600, 'grad_time_ms': 368.83, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 385.34967041015625, 'policy_loss': -0.1683931201696396, 'vf_explained_var': 0.02904464863240719, 'entropy': 8.951404571533203, 'cur_lr': 4.999999873689376e-05, 'total_loss': 385.2170715332031, 'kl': 0.01571129448711872}, 'load_time_ms': 0.665, 'num_steps_sampled': 507600, 'update_time_ms': 2.615}",423,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.01524543762207,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,507600,507600,{},423,53,-88.4753479922558,2025-09-04_20-37-45,8.000303363649481,3651948,1757011065,-25.800704915984554,15890.32418012619,15363,21.06
+cda-server-2,False,15933.161979436874,"{'sample_time_ms': 41049.649, 'num_steps_trained': 508800, 'grad_time_ms': 370.584, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 249.62008666992188, 'policy_loss': -0.16717633605003357, 'vf_explained_var': 0.06472889333963394, 'entropy': 9.280853271484375, 'cur_lr': 4.999999873689376e-05, 'total_loss': 249.48416137695312, 'kl': 0.013700157403945923}, 'load_time_ms': 0.674, 'num_steps_sampled': 508800, 'update_time_ms': 2.585}",424,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.837799310684204,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,508800,508800,{},424,62,-88.4753479922558,2025-09-04_20-38-28,8.000303363649481,3651948,1757011108,-25.162971277519446,15933.161979436874,15425,20.74
+cda-server-2,False,15974.887422084808,"{'sample_time_ms': 41137.57, 'num_steps_trained': 510000, 'grad_time_ms': 372.385, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 278.6687927246094, 'policy_loss': -0.15017859637737274, 'vf_explained_var': 0.04893035441637039, 'entropy': 8.757755279541016, 'cur_lr': 4.999999873689376e-05, 'total_loss': 278.5521240234375, 'kl': 0.014712914824485779}, 'load_time_ms': 0.682, 'num_steps_sampled': 510000, 'update_time_ms': 2.603}",425,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.72544264793396,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,510000,510000,{},425,78,-87.41739124364128,2025-09-04_20-39-10,8.000000576653324,3651948,1757011150,-17.38091832113621,15974.887422084808,15503,16.53
+cda-server-2,False,16016.248041629791,"{'sample_time_ms': 41186.426, 'num_steps_trained': 511200, 'grad_time_ms': 371.474, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 367.4398193359375, 'policy_loss': -0.16482672095298767, 'vf_explained_var': 0.02282983809709549, 'entropy': 9.066640853881836, 'cur_lr': 4.999999873689376e-05, 'total_loss': 367.3052978515625, 'kl': 0.013300522230565548}, 'load_time_ms': 0.696, 'num_steps_sampled': 511200, 'update_time_ms': 2.595}",426,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.36061954498291,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,511200,511200,{},426,59,-87.36783953007203,2025-09-04_20-39-51,8.000372268868835,3651948,1757011191,-21.251164770190165,16016.248041629791,15562,18.51
+cda-server-2,False,16057.148822069168,"{'sample_time_ms': 41123.02, 'num_steps_trained': 512400, 'grad_time_ms': 370.033, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 384.1492614746094, 'policy_loss': -0.1670141965150833, 'vf_explained_var': 0.04643003270030022, 'entropy': 8.743922233581543, 'cur_lr': 4.999999873689376e-05, 'total_loss': 384.01422119140625, 'kl': 0.014027304016053677}, 'load_time_ms': 0.694, 'num_steps_sampled': 512400, 'update_time_ms': 2.58}",427,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.90078043937683,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,512400,512400,{},427,64,-87.7358976901057,2025-09-04_20-40-32,8.000372268868835,3651948,1757011232,-21.298042743251134,16057.148822069168,15626,18.39
+cda-server-2,False,16098.29258608818,"{'sample_time_ms': 41100.097, 'num_steps_trained': 513600, 'grad_time_ms': 370.204, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 315.5397644042969, 'policy_loss': -0.16934019327163696, 'vf_explained_var': 0.03011532686650753, 'entropy': 8.76919937133789, 'cur_lr': 4.999999873689376e-05, 'total_loss': 315.4030456542969, 'kl': 0.014322774484753609}, 'load_time_ms': 0.686, 'num_steps_sampled': 513600, 'update_time_ms': 2.58}",428,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.14376401901245,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,513600,513600,{},428,71,-87.7358976901057,2025-09-04_20-41-13,8.000580944936152,3651948,1757011273,-19.053094048889122,16098.29258608818,15697,17.31
+cda-server-2,False,16140.245764255524,"{'sample_time_ms': 41074.145, 'num_steps_trained': 514800, 'grad_time_ms': 371.714, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 346.0904541015625, 'policy_loss': -0.15643729269504547, 'vf_explained_var': 0.027360280975699425, 'entropy': 8.556154251098633, 'cur_lr': 4.999999873689376e-05, 'total_loss': 345.9683837890625, 'kl': 0.015092356130480766}, 'load_time_ms': 0.7, 'num_steps_sampled': 514800, 'update_time_ms': 2.579}",429,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.95317816734314,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,514800,514800,{},429,67,-89.88324149646371,2025-09-04_20-41-55,8.000002145866585,3651948,1757011315,-20.291947756535507,16140.245764255524,15764,17.8
+cda-server-2,False,16181.876400232315,"{'sample_time_ms': 41132.037, 'num_steps_trained': 516000, 'grad_time_ms': 369.429, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 313.5926513671875, 'policy_loss': -0.15500952303409576, 'vf_explained_var': 0.03515625, 'entropy': 8.963751792907715, 'cur_lr': 4.999999873689376e-05, 'total_loss': 313.47021484375, 'kl': 0.014299273490905762}, 'load_time_ms': 0.706, 'num_steps_sampled': 516000, 'update_time_ms': 2.605}",430,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.63063597679138,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,516000,516000,{},430,62,-89.21387403888859,2025-09-04_20-42-37,8.000025486195257,3651948,1757011357,-21.170838996596707,16181.876400232315,15826,18.47
+cda-server-2,False,16223.264105081558,"{'sample_time_ms': 41107.196, 'num_steps_trained': 517200, 'grad_time_ms': 372.25, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 319.3018798828125, 'policy_loss': -0.1622197926044464, 'vf_explained_var': 0.036697857081890106, 'entropy': 8.870936393737793, 'cur_lr': 4.999999873689376e-05, 'total_loss': 319.16925048828125, 'kl': 0.0129969147965312}, 'load_time_ms': 0.721, 'num_steps_sampled': 517200, 'update_time_ms': 2.508}",431,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.387704849243164,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,517200,517200,{},431,71,-88.88322066238273,2025-09-04_20-43-19,8.00021796775948,3651948,1757011399,-19.235878452041984,16223.264105081558,15897,17.37
+cda-server-2,False,16264.251901388168,"{'sample_time_ms': 41113.298, 'num_steps_trained': 518400, 'grad_time_ms': 372.889, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 316.2972412109375, 'policy_loss': -0.15378178656101227, 'vf_explained_var': 0.04921703040599823, 'entropy': 8.469070434570312, 'cur_lr': 4.999999873689376e-05, 'total_loss': 316.1748962402344, 'kl': 0.013782855123281479}, 'load_time_ms': 0.73, 'num_steps_sampled': 518400, 'update_time_ms': 2.522}",432,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.98779630661011,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,518400,518400,{},432,68,-86.84082784043173,2025-09-04_20-44-00,8.00034664042358,3651948,1757011440,-19.35597032575904,16264.251901388168,15965,17.35
+cda-server-2,False,16305.171558618546,"{'sample_time_ms': 41105.105, 'num_steps_trained': 519600, 'grad_time_ms': 371.462, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 314.1938781738281, 'policy_loss': -0.15594321489334106, 'vf_explained_var': 0.027978135272860527, 'entropy': 8.679997444152832, 'cur_lr': 4.999999873689376e-05, 'total_loss': 314.0681457519531, 'kl': 0.013278153724968433}, 'load_time_ms': 0.732, 'num_steps_sampled': 519600, 'update_time_ms': 2.51}",433,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.9196572303772,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,519600,519600,{},433,71,-88.80748243867461,2025-09-04_20-44-40,8.001276994407831,3651948,1757011480,-18.51521611093584,16305.171558618546,16036,16.79
+cda-server-2,False,16346.109334468842,"{'sample_time_ms': 40915.475, 'num_steps_trained': 520800, 'grad_time_ms': 370.989, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 338.2723693847656, 'policy_loss': -0.15147621929645538, 'vf_explained_var': 0.044524677097797394, 'entropy': 8.881685256958008, 'cur_lr': 4.999999873689376e-05, 'total_loss': 338.156005859375, 'kl': 0.015402843244373798}, 'load_time_ms': 0.729, 'num_steps_sampled': 520800, 'update_time_ms': 2.55}",434,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.93777585029602,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,520800,520800,{},434,65,-88.64775466310672,2025-09-04_20-45-21,8.001276994407831,3651948,1757011521,-19.979875043752536,16346.109334468842,16101,17.69
+cda-server-2,False,16388.005512714386,"{'sample_time_ms': 40933.02, 'num_steps_trained': 522000, 'grad_time_ms': 370.457, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 309.99200439453125, 'policy_loss': -0.15323799848556519, 'vf_explained_var': 0.04143669083714485, 'entropy': 8.597495079040527, 'cur_lr': 4.999999873689376e-05, 'total_loss': 309.87548828125, 'kl': 0.0161251500248909}, 'load_time_ms': 0.728, 'num_steps_sampled': 522000, 'update_time_ms': 2.593}",435,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.896178245544434,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,522000,522000,{},435,68,-86.39453445540397,2025-09-04_20-46-03,8.00002282974312,3651948,1757011563,-19.94524785610513,16388.005512714386,16169,17.9
+cda-server-2,False,16429.511291265488,"{'sample_time_ms': 40946.253, 'num_steps_trained': 523200, 'grad_time_ms': 371.756, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 342.5484619140625, 'policy_loss': -0.147806316614151, 'vf_explained_var': 0.028934823349118233, 'entropy': 9.265068054199219, 'cur_lr': 4.999999873689376e-05, 'total_loss': 342.4322204589844, 'kl': 0.013850619085133076}, 'load_time_ms': 0.709, 'num_steps_sampled': 523200, 'update_time_ms': 2.604}",436,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.505778551101685,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,523200,523200,{},436,72,-86.41977100309569,2025-09-04_20-46-45,8.000033196464619,3651948,1757011605,-19.240552723068152,16429.511291265488,16241,17.43
+cda-server-2,False,16470.664858818054,"{'sample_time_ms': 40971.658, 'num_steps_trained': 524400, 'grad_time_ms': 371.669, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 364.4984436035156, 'policy_loss': -0.16167707741260529, 'vf_explained_var': 0.045816823840141296, 'entropy': 8.512593269348145, 'cur_lr': 4.999999873689376e-05, 'total_loss': 364.37127685546875, 'kl': 0.015125438570976257}, 'load_time_ms': 0.706, 'num_steps_sampled': 524400, 'update_time_ms': 2.595}",437,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.15356755256653,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,524400,524400,{},437,61,-86.41977100309569,2025-09-04_20-47-26,8.000000835511225,3651948,1757011646,-21.402012256572934,16470.664858818054,16302,18.45
+cda-server-2,False,16511.818156003952,"{'sample_time_ms': 40970.266, 'num_steps_trained': 525600, 'grad_time_ms': 373.921, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 385.24468994140625, 'policy_loss': -0.1535537838935852, 'vf_explained_var': 0.035642359405756, 'entropy': 8.525103569030762, 'cur_lr': 4.999999873689376e-05, 'total_loss': 385.1243591308594, 'kl': 0.014598245732486248}, 'load_time_ms': 0.726, 'num_steps_sampled': 525600, 'update_time_ms': 2.596}",438,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.15329718589783,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,525600,525600,{},438,68,-89.89362692541498,2025-09-04_20-48-07,8.000018341866456,3651948,1757011687,-20.951401441454294,16511.818156003952,16370,18.2
+cda-server-2,False,16553.65321779251,"{'sample_time_ms': 40959.44, 'num_steps_trained': 526800, 'grad_time_ms': 372.979, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 355.47296142578125, 'policy_loss': -0.1548500955104828, 'vf_explained_var': 0.035611316561698914, 'entropy': 8.34416675567627, 'cur_lr': 4.999999873689376e-05, 'total_loss': 355.3534851074219, 'kl': 0.01554470881819725}, 'load_time_ms': 0.713, 'num_steps_sampled': 526800, 'update_time_ms': 2.635}",439,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.83506178855896,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,526800,526800,{},439,76,-87.30425045952317,2025-09-04_20-48-49,8.000062272518257,3651948,1757011729,-17.817107062266913,16553.65321779251,16446,16.55
+cda-server-2,False,16595.434185028076,"{'sample_time_ms': 40973.372, 'num_steps_trained': 528000, 'grad_time_ms': 374.091, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 414.3402099609375, 'policy_loss': -0.1643889993429184, 'vf_explained_var': 0.03891804441809654, 'entropy': 8.819328308105469, 'cur_lr': 4.999999873689376e-05, 'total_loss': 414.2076110839844, 'kl': 0.013955799862742424}, 'load_time_ms': 0.714, 'num_steps_sampled': 528000, 'update_time_ms': 2.621}",440,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.780967235565186,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,528000,528000,{},440,73,-87.03746228171902,2025-09-04_20-49-31,8.000193163737467,3651948,1757011771,-16.514277600802984,16595.434185028076,16519,15.74
+cda-server-2,False,16637.182630062103,"{'sample_time_ms': 41010.094, 'num_steps_trained': 529200, 'grad_time_ms': 373.472, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 281.71002197265625, 'policy_loss': -0.15865615010261536, 'vf_explained_var': 0.01832013577222824, 'entropy': 8.66702651977539, 'cur_lr': 4.999999873689376e-05, 'total_loss': 281.5833435058594, 'kl': 0.014038166962563992}, 'load_time_ms': 0.702, 'num_steps_sampled': 529200, 'update_time_ms': 2.614}",441,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.7484450340271,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,529200,529200,{},441,77,-87.03746228171902,2025-09-04_20-50-13,8.000000929489092,3651948,1757011813,-18.099141394017842,16637.182630062103,16596,16.63
+cda-server-2,False,16679.03945326805,"{'sample_time_ms': 41097.119, 'num_steps_trained': 530400, 'grad_time_ms': 373.397, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 379.4559020996094, 'policy_loss': -0.15498653054237366, 'vf_explained_var': 0.029985547065734863, 'entropy': 8.676912307739258, 'cur_lr': 4.999999873689376e-05, 'total_loss': 379.3329162597656, 'kl': 0.014052795246243477}, 'load_time_ms': 0.691, 'num_steps_sampled': 530400, 'update_time_ms': 2.593}",442,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.856823205947876,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,530400,530400,{},442,69,-87.8607226904853,2025-09-04_20-50-55,8.000010497723688,3651948,1757011855,-19.481709087181883,16679.03945326805,16665,17.45
+cda-server-2,False,16720.618898153305,"{'sample_time_ms': 41161.866, 'num_steps_trained': 531600, 'grad_time_ms': 374.636, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 320.0054626464844, 'policy_loss': -0.16083712875843048, 'vf_explained_var': 0.04631289467215538, 'entropy': 8.030533790588379, 'cur_lr': 4.999999873689376e-05, 'total_loss': 319.877197265625, 'kl': 0.014289619401097298}, 'load_time_ms': 0.69, 'num_steps_sampled': 531600, 'update_time_ms': 2.602}",443,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.579444885253906,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,531600,531600,{},443,83,-87.61995030853359,2025-09-04_20-51-36,8.000301482042897,3651948,1757011896,-12.899752233581278,16720.618898153305,16748,13.8
+cda-server-2,False,16762.03003191948,"{'sample_time_ms': 41210.451, 'num_steps_trained': 532800, 'grad_time_ms': 373.476, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 334.7008056640625, 'policy_loss': -0.1596754640340805, 'vf_explained_var': 0.04282053932547569, 'entropy': 8.831731796264648, 'cur_lr': 4.999999873689376e-05, 'total_loss': 334.5743408203125, 'kl': 0.01455344632267952}, 'load_time_ms': 0.686, 'num_steps_sampled': 532800, 'update_time_ms': 2.594}",444,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.411133766174316,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,532800,532800,{},444,64,-88.81839473896785,2025-09-04_20-52-18,8.000016245196393,3651948,1757011938,-18.987936818539836,16762.03003191948,16812,17.3
+cda-server-2,False,16804.291570425034,"{'sample_time_ms': 41247.39, 'num_steps_trained': 534000, 'grad_time_ms': 373.179, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 291.1845397949219, 'policy_loss': -0.16169238090515137, 'vf_explained_var': 0.0301960501819849, 'entropy': 8.103074073791504, 'cur_lr': 4.999999873689376e-05, 'total_loss': 291.0556945800781, 'kl': 0.01442566979676485}, 'load_time_ms': 0.673, 'num_steps_sampled': 534000, 'update_time_ms': 2.527}",445,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.2615385055542,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,534000,534000,{},445,79,-85.35315981367188,2025-09-04_20-53-00,8.000084072096763,3651948,1757011980,-14.920949637008876,16804.291570425034,16891,14.98
+cda-server-2,False,16845.300713777542,"{'sample_time_ms': 41199.007, 'num_steps_trained': 535200, 'grad_time_ms': 371.881, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 338.442138671875, 'policy_loss': -0.15294824540615082, 'vf_explained_var': 0.03564156964421272, 'entropy': 8.482898712158203, 'cur_lr': 4.999999873689376e-05, 'total_loss': 338.3228454589844, 'kl': 0.014768613502383232}, 'load_time_ms': 0.68, 'num_steps_sampled': 535200, 'update_time_ms': 2.499}",446,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.009143352508545,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,535200,535200,{},446,58,-88.5915819217999,2025-09-04_20-53-41,8.00002514491476,3651948,1757012021,-19.8870777818862,16845.300713777542,16949,17.73
+cda-server-2,False,16886.91087770462,"{'sample_time_ms': 41244.008, 'num_steps_trained': 536400, 'grad_time_ms': 372.521, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 175.49859619140625, 'policy_loss': -0.15918566286563873, 'vf_explained_var': 0.0374857522547245, 'entropy': 8.244630813598633, 'cur_lr': 4.999999873689376e-05, 'total_loss': 175.37261962890625, 'kl': 0.01456509530544281}, 'load_time_ms': 0.681, 'num_steps_sampled': 536400, 'update_time_ms': 2.53}",447,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.61016392707825,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,536400,536400,{},447,85,-82.83504604852666,2025-09-04_20-54-22,8.000089276407737,3651948,1757012062,-14.488990583660609,16886.91087770462,17034,14.77
+cda-server-2,False,16927.87378692627,"{'sample_time_ms': 41226.985, 'num_steps_trained': 537600, 'grad_time_ms': 370.566, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 284.6360778808594, 'policy_loss': -0.15949472784996033, 'vf_explained_var': 0.03629742190241814, 'entropy': 8.63432502746582, 'cur_lr': 4.999999873689376e-05, 'total_loss': 284.5114440917969, 'kl': 0.015297316946089268}, 'load_time_ms': 0.664, 'num_steps_sampled': 537600, 'update_time_ms': 2.485}",448,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",40.96290922164917,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,537600,537600,{},448,73,-87.3940341602237,2025-09-04_20-55-03,8.000302562516929,3651948,1757012103,-14.556468628799866,16927.87378692627,17107,14.87
+cda-server-2,False,16969.17755842209,"{'sample_time_ms': 41174.344, 'num_steps_trained': 538800, 'grad_time_ms': 370.061, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 231.8226776123047, 'policy_loss': -0.1713278889656067, 'vf_explained_var': 0.048882465809583664, 'entropy': 8.455493927001953, 'cur_lr': 4.999999873689376e-05, 'total_loss': 231.6845703125, 'kl': 0.014588426798582077}, 'load_time_ms': 0.662, 'num_steps_sampled': 538800, 'update_time_ms': 2.474}",449,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.30377149581909,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,538800,538800,{},449,77,-71.82392088658297,2025-09-04_20-55-45,8.000001172878449,3651948,1757012145,-15.705366250989869,16969.17755842209,17184,15.75
+cda-server-2,False,17010.63756752014,"{'sample_time_ms': 41142.753, 'num_steps_trained': 540000, 'grad_time_ms': 369.542, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 281.6278076171875, 'policy_loss': -0.1522459238767624, 'vf_explained_var': 0.05002821236848831, 'entropy': 8.116762161254883, 'cur_lr': 4.999999873689376e-05, 'total_loss': 281.5114440917969, 'kl': 0.01575664058327675}, 'load_time_ms': 0.667, 'num_steps_sampled': 540000, 'update_time_ms': 2.485}",450,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.46000909805298,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,540000,540000,{},450,80,-84.15010302375366,2025-09-04_20-56-26,8.00005827544259,3651948,1757012186,-14.93996309152615,17010.63756752014,17264,15.09
+cda-server-2,False,17051.653188228607,"{'sample_time_ms': 41071.39, 'num_steps_trained': 541200, 'grad_time_ms': 367.579, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 319.8498229980469, 'policy_loss': -0.1568194031715393, 'vf_explained_var': 0.018670465797185898, 'entropy': 8.665520668029785, 'cur_lr': 4.999999873689376e-05, 'total_loss': 319.7239990234375, 'kl': 0.013618793338537216}, 'load_time_ms': 0.661, 'num_steps_sampled': 541200, 'update_time_ms': 2.58}",451,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.015620708465576,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,541200,541200,{},451,69,-87.26869020953562,2025-09-04_20-57-07,8.000050737390461,3651948,1757012227,-19.006085120852394,17051.653188228607,17333,17.34
+cda-server-2,False,17092.9470539093,"{'sample_time_ms': 41014.507, 'num_steps_trained': 542400, 'grad_time_ms': 368.17, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 288.7252502441406, 'policy_loss': -0.1631011813879013, 'vf_explained_var': 0.029342809692025185, 'entropy': 8.429981231689453, 'cur_lr': 4.999999873689376e-05, 'total_loss': 288.5946044921875, 'kl': 0.014258328825235367}, 'load_time_ms': 0.66, 'num_steps_sampled': 542400, 'update_time_ms': 2.56}",452,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.29386568069458,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,542400,542400,{},452,74,-87.34813542686608,2025-09-04_20-57-49,8.000189292670523,3651948,1757012269,-18.18275225378042,17092.9470539093,17407,16.83
+cda-server-2,False,17134.125964164734,"{'sample_time_ms': 40974.083, 'num_steps_trained': 543600, 'grad_time_ms': 368.54, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 394.4097900390625, 'policy_loss': -0.16569074988365173, 'vf_explained_var': 0.03131605684757233, 'entropy': 8.512945175170898, 'cur_lr': 4.999999873689376e-05, 'total_loss': 394.2773742675781, 'kl': 0.014615214429795742}, 'load_time_ms': 0.664, 'num_steps_sampled': 543600, 'update_time_ms': 2.614}",453,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.17891025543213,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,543600,543600,{},453,64,-90.00949107692566,2025-09-04_20-58-30,8.000245652836771,3651948,1757012310,-19.285058586526443,17134.125964164734,17471,17.48
+cda-server-2,False,17176.44965982437,"{'sample_time_ms': 41062.823, 'num_steps_trained': 544800, 'grad_time_ms': 371.057, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 358.07147216796875, 'policy_loss': -0.147636279463768, 'vf_explained_var': 0.03243735060095787, 'entropy': 8.070189476013184, 'cur_lr': 4.999999873689376e-05, 'total_loss': 357.9606628417969, 'kl': 0.01616663858294487}, 'load_time_ms': 0.675, 'num_steps_sampled': 544800, 'update_time_ms': 2.586}",454,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.32369565963745,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,544800,544800,{},454,84,-90.00949107692566,2025-09-04_20-59-12,8.000010170167442,3651948,1757012352,-15.418296576200568,17176.44965982437,17555,15.15
+cda-server-2,False,17218.774721622467,"{'sample_time_ms': 41068.327, 'num_steps_trained': 546000, 'grad_time_ms': 371.816, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 274.8625183105469, 'policy_loss': -0.15433499217033386, 'vf_explained_var': 0.04063411429524422, 'entropy': 8.161273956298828, 'cur_lr': 4.999999873689376e-05, 'total_loss': 274.7413330078125, 'kl': 0.014555818401277065}, 'load_time_ms': 0.691, 'num_steps_sampled': 546000, 'update_time_ms': 2.608}",455,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.3250617980957,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,546000,546000,{},455,78,-87.03464169534483,2025-09-04_20-59-55,8.001451916235133,3651948,1757012395,-14.942562569436975,17218.774721622467,17633,14.96
+cda-server-2,False,17260.553253889084,"{'sample_time_ms': 41144.626, 'num_steps_trained': 547200, 'grad_time_ms': 372.396, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 259.0595703125, 'policy_loss': -0.15590450167655945, 'vf_explained_var': 0.05277172848582268, 'entropy': 7.884790897369385, 'cur_lr': 4.999999873689376e-05, 'total_loss': 258.9333801269531, 'kl': 0.013034400530159473}, 'load_time_ms': 0.695, 'num_steps_sampled': 547200, 'update_time_ms': 2.671}",456,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.77853226661682,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,547200,547200,{},456,90,-88.63748942824976,2025-09-04_21-00-36,8.001124234118306,3651948,1757012436,-12.186383781204604,17260.553253889084,17723,13.37
+cda-server-2,False,17303.146927833557,"{'sample_time_ms': 41242.352, 'num_steps_trained': 548400, 'grad_time_ms': 372.992, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 234.77056884765625, 'policy_loss': -0.1650943160057068, 'vf_explained_var': 0.03529277816414833, 'entropy': 8.531253814697266, 'cur_lr': 4.999999873689376e-05, 'total_loss': 234.63693237304688, 'kl': 0.013813511468470097}, 'load_time_ms': 0.706, 'num_steps_sampled': 548400, 'update_time_ms': 2.666}",457,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.59367394447327,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,548400,548400,{},457,88,-84.94318505630861,2025-09-04_21-01-19,8.000029127239786,3651948,1757012479,-11.773299884421103,17303.146927833557,17811,13.41
+cda-server-2,False,17344.81735086441,"{'sample_time_ms': 41312.334, 'num_steps_trained': 549600, 'grad_time_ms': 373.744, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 226.6254425048828, 'policy_loss': -0.16493502259254456, 'vf_explained_var': 0.042032089084386826, 'entropy': 8.069854736328125, 'cur_lr': 4.999999873689376e-05, 'total_loss': 226.49737548828125, 'kl': 0.016184350475668907}, 'load_time_ms': 0.701, 'num_steps_sampled': 549600, 'update_time_ms': 2.704}",458,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.67042303085327,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,549600,549600,{},458,70,-87.02378533047025,2025-09-04_21-02-01,8.000029423041246,3651948,1757012521,-17.5357891026734,17344.81735086441,17881,16.62
+cda-server-2,False,17384.00404715538,"{'sample_time_ms': 41099.307, 'num_steps_trained': 550800, 'grad_time_ms': 375.05, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 232.12977600097656, 'policy_loss': -0.14683347940444946, 'vf_explained_var': 0.03351776301860809, 'entropy': 8.242464065551758, 'cur_lr': 4.999999873689376e-05, 'total_loss': 232.02127075195312, 'kl': 0.016831597313284874}, 'load_time_ms': 0.708, 'num_steps_sampled': 550800, 'update_time_ms': 2.729}",459,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.18669629096985,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,550800,550800,{},459,87,-87.02378533047025,2025-09-04_21-02-40,8.000038132613607,3651948,1757012560,-13.801652622181992,17384.00404715538,17968,14.44
+cda-server-2,False,17423.686593294144,"{'sample_time_ms': 40922.125, 'num_steps_trained': 552000, 'grad_time_ms': 374.485, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 234.0694122314453, 'policy_loss': -0.15866075456142426, 'vf_explained_var': 0.035337552428245544, 'entropy': 8.417089462280273, 'cur_lr': 4.999999873689376e-05, 'total_loss': 233.94387817382812, 'kl': 0.01453636959195137}, 'load_time_ms': 0.704, 'num_steps_sampled': 552000, 'update_time_ms': 2.716}",460,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",39.68254613876343,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,552000,552000,{},460,77,-83.88185249257836,2025-09-04_21-03-20,8.000636031322111,3651948,1757012600,-14.473103590357972,17423.686593294144,18045,14.93
+cda-server-2,False,17464.750234603882,"{'sample_time_ms': 40923.764, 'num_steps_trained': 553200, 'grad_time_ms': 377.604, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 301.85015869140625, 'policy_loss': -0.17026448249816895, 'vf_explained_var': 0.026150895282626152, 'entropy': 8.212603569030762, 'cur_lr': 4.999999873689376e-05, 'total_loss': 301.7120361328125, 'kl': 0.014113317243754864}, 'load_time_ms': 0.719, 'num_steps_sampled': 553200, 'update_time_ms': 2.657}",461,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.06364130973816,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,553200,553200,{},461,72,-86.74672593739993,2025-09-04_21-04-01,8.000563786902937,3651948,1757012641,-18.313983170418428,17464.750234603882,18117,17.1
+cda-server-2,False,17506.31477212906,"{'sample_time_ms': 40951.639, 'num_steps_trained': 554400, 'grad_time_ms': 376.803, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 244.6202850341797, 'policy_loss': -0.1634778082370758, 'vf_explained_var': 0.07023818045854568, 'entropy': 8.577519416809082, 'cur_lr': 4.999999873689376e-05, 'total_loss': 244.48876953125, 'kl': 0.014043360948562622}, 'load_time_ms': 0.715, 'num_steps_sampled': 554400, 'update_time_ms': 2.705}",462,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.564537525177,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,554400,554400,{},462,76,-87.00423157440254,2025-09-04_21-04-42,8.00011506562049,3651948,1757012682,-15.492103045760286,17506.31477212906,18193,15.58
+cda-server-2,False,17547.55945444107,"{'sample_time_ms': 40959.104, 'num_steps_trained': 555600, 'grad_time_ms': 375.928, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 257.6145935058594, 'policy_loss': -0.15277798473834991, 'vf_explained_var': 0.05817045271396637, 'entropy': 8.55958366394043, 'cur_lr': 4.999999873689376e-05, 'total_loss': 257.4984130859375, 'kl': 0.01608334667980671}, 'load_time_ms': 0.715, 'num_steps_sampled': 555600, 'update_time_ms': 2.65}",463,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.24468231201172,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,555600,555600,{},463,73,-87.47294627550268,2025-09-04_21-05-24,8.000169021854774,3651948,1757012724,-16.830785683922095,17547.55945444107,18266,16.2
+cda-server-2,False,17589.918608427048,"{'sample_time_ms': 40961.879, 'num_steps_trained': 556800, 'grad_time_ms': 376.652, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 211.51295471191406, 'policy_loss': -0.1506921648979187, 'vf_explained_var': 0.04846331849694252, 'entropy': 8.07451343536377, 'cur_lr': 4.999999873689376e-05, 'total_loss': 211.39886474609375, 'kl': 0.016063140705227852}, 'load_time_ms': 0.724, 'num_steps_sampled': 556800, 'update_time_ms': 2.646}",464,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.35915398597717,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,556800,556800,{},464,96,-84.9705695047172,2025-09-04_21-06-06,8.00009345027309,3651948,1757012766,-10.908602615462316,17589.918608427048,18362,12.85
+cda-server-2,False,17631.50919151306,"{'sample_time_ms': 40890.134, 'num_steps_trained': 558000, 'grad_time_ms': 374.93, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 307.7181091308594, 'policy_loss': -0.1529518961906433, 'vf_explained_var': 0.03323771059513092, 'entropy': 8.194103240966797, 'cur_lr': 4.999999873689376e-05, 'total_loss': 307.5993347167969, 'kl': 0.014989580027759075}, 'load_time_ms': 0.714, 'num_steps_sampled': 558000, 'update_time_ms': 2.64}",465,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.590583086013794,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,558000,558000,{},465,72,-85.39355564165294,2025-09-04_21-06-47,8.000000981212601,3651948,1757012807,-16.09407457830146,17631.50919151306,18434,15.67
+cda-server-2,False,17672.830092430115,"{'sample_time_ms': 40845.424, 'num_steps_trained': 559200, 'grad_time_ms': 373.882, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 324.7706298828125, 'policy_loss': -0.14864295721054077, 'vf_explained_var': 0.07129890471696854, 'entropy': 8.121540069580078, 'cur_lr': 4.999999873689376e-05, 'total_loss': 324.65435791015625, 'kl': 0.014201385900378227}, 'load_time_ms': 0.734, 'num_steps_sampled': 559200, 'update_time_ms': 2.645}",466,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.32090091705322,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,559200,559200,{},466,72,-86.69817533466964,2025-09-04_21-07-29,8.000000981212601,3651948,1757012849,-17.88563546355931,17672.830092430115,18506,16.68
+cda-server-2,False,17714.86853003502,"{'sample_time_ms': 40790.17, 'num_steps_trained': 560400, 'grad_time_ms': 373.663, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 305.15966796875, 'policy_loss': -0.14870496094226837, 'vf_explained_var': 0.050349798053503036, 'entropy': 8.24783992767334, 'cur_lr': 4.999999873689376e-05, 'total_loss': 305.0430908203125, 'kl': 0.014107043854892254}, 'load_time_ms': 0.727, 'num_steps_sampled': 560400, 'update_time_ms': 2.617}",467,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.038437604904175,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,560400,560400,{},467,73,-87.01498676038369,2025-09-04_21-08-11,8.000242708047294,3651948,1757012891,-16.99994393394948,17714.86853003502,18579,16.14
+cda-server-2,False,17757.885385751724,"{'sample_time_ms': 40925.756, 'num_steps_trained': 561600, 'grad_time_ms': 372.726, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 267.5435791015625, 'policy_loss': -0.15850119292736053, 'vf_explained_var': 0.04324857518076897, 'entropy': 8.296599388122559, 'cur_lr': 4.999999873689376e-05, 'total_loss': 267.42022705078125, 'kl': 0.015434009954333305}, 'load_time_ms': 0.741, 'num_steps_sampled': 561600, 'update_time_ms': 2.627}",468,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.01685571670532,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,561600,561600,{},468,76,-84.67355768924301,2025-09-04_21-08-54,8.000242708047294,3651948,1757012934,-17.36202630460213,17757.885385751724,18655,16.49
+cda-server-2,False,17800.227256536484,"{'sample_time_ms': 41243.709, 'num_steps_trained': 562800, 'grad_time_ms': 370.323, 'default': {'cur_kl_coeff': 2.278125047683716, 'vf_loss': 196.42506408691406, 'policy_loss': -0.1404145359992981, 'vf_explained_var': 0.0622972697019577, 'entropy': 8.130264282226562, 'cur_lr': 4.999999873689376e-05, 'total_loss': 196.33262634277344, 'kl': 0.021073700860142708}, 'load_time_ms': 0.741, 'num_steps_sampled': 562800, 'update_time_ms': 2.608}",469,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.34187078475952,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,562800,562800,{},469,82,-83.35466161554041,2025-09-04_21-09-36,8.000000400449515,3651948,1757012976,-14.09392057265725,17800.227256536484,18737,14.71
+cda-server-2,False,17842.053878068924,"{'sample_time_ms': 41456.792, 'num_steps_trained': 564000, 'grad_time_ms': 371.673, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 225.6439208984375, 'policy_loss': -0.1375613808631897, 'vf_explained_var': 0.08121463656425476, 'entropy': 8.127840042114258, 'cur_lr': 4.999999873689376e-05, 'total_loss': 225.542724609375, 'kl': 0.010645460337400436}, 'load_time_ms': 0.74, 'num_steps_sampled': 564000, 'update_time_ms': 2.589}",470,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.826621532440186,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,564000,564000,{},470,84,-87.60770839305536,2025-09-04_21-10-18,8.000000406113983,3651948,1757013018,-12.47544421668838,17842.053878068924,18821,13.76
+cda-server-2,False,17883.36307120323,"{'sample_time_ms': 41484.563, 'num_steps_trained': 565200, 'grad_time_ms': 368.478, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 395.8850402832031, 'policy_loss': -0.1417011171579361, 'vf_explained_var': 0.0316615104675293, 'entropy': 8.629416465759277, 'cur_lr': 4.999999873689376e-05, 'total_loss': 395.7802429199219, 'kl': 0.010803967714309692}, 'load_time_ms': 0.731, 'num_steps_sampled': 565200, 'update_time_ms': 2.635}",471,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.30919313430786,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,565200,565200,{},471,79,-87.47708512475148,2025-09-04_21-11-00,8.000576421701625,3651948,1757013060,-13.227012858516819,17883.36307120323,18900,14.02
+cda-server-2,False,17924.94619822502,"{'sample_time_ms': 41484.285, 'num_steps_trained': 566400, 'grad_time_ms': 370.58, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 314.4154968261719, 'policy_loss': -0.15358594059944153, 'vf_explained_var': 0.04043276980519295, 'entropy': 8.475257873535156, 'cur_lr': 4.999999873689376e-05, 'total_loss': 314.2979736328125, 'kl': 0.010557727888226509}, 'load_time_ms': 0.737, 'num_steps_sampled': 566400, 'update_time_ms': 2.607}",472,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.58312702178955,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,566400,566400,{},472,76,-87.13177188269114,2025-09-04_21-11-41,8.000055159235595,3651948,1757013101,-16.99260804384653,17924.94619822502,18976,16.12
+cda-server-2,False,17966.750247716904,"{'sample_time_ms': 41542.479, 'num_steps_trained': 567600, 'grad_time_ms': 368.348, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 273.8243103027344, 'policy_loss': -0.1353476345539093, 'vf_explained_var': 0.05185036361217499, 'entropy': 8.484976768493652, 'cur_lr': 4.999999873689376e-05, 'total_loss': 273.7279052734375, 'kl': 0.011385568417608738}, 'load_time_ms': 0.728, 'num_steps_sampled': 567600, 'update_time_ms': 2.625}",473,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.804049491882324,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,567600,567600,{},473,81,-86.55932765284689,2025-09-04_21-12-23,8.000000588705433,3651948,1757013143,-13.989362262801153,17966.750247716904,19057,14.53
+cda-server-2,False,18008.82496738434,"{'sample_time_ms': 41517.291, 'num_steps_trained': 568800, 'grad_time_ms': 365.168, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 256.0528564453125, 'policy_loss': -0.15473327040672302, 'vf_explained_var': 0.0509008951485157, 'entropy': 7.856842994689941, 'cur_lr': 4.999999873689376e-05, 'total_loss': 255.9336700439453, 'kl': 0.010404815897345543}, 'load_time_ms': 0.716, 'num_steps_sampled': 568800, 'update_time_ms': 2.63}",474,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.07471966743469,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,568800,568800,{},474,83,-81.52972203296628,2025-09-04_21-13-05,8.0002414412144,3651948,1757013185,-14.391012181483756,18008.82496738434,19140,14.68
+cda-server-2,False,18050.66978764534,"{'sample_time_ms': 41542.297, 'num_steps_trained': 570000, 'grad_time_ms': 365.636, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 308.62677001953125, 'policy_loss': -0.1460433006286621, 'vf_explained_var': 0.03346286341547966, 'entropy': 8.371639251708984, 'cur_lr': 4.999999873689376e-05, 'total_loss': 308.5193786621094, 'kl': 0.011313981376588345}, 'load_time_ms': 0.727, 'num_steps_sampled': 570000, 'update_time_ms': 2.634}",475,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.84482026100159,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,570000,570000,{},475,69,-85.97524171398528,2025-09-04_21-13-47,8.0002414412144,3651948,1757013227,-19.02973859829269,18050.66978764534,19209,17.31
+cda-server-2,False,18092.151755332947,"{'sample_time_ms': 41557.563, 'num_steps_trained': 571200, 'grad_time_ms': 366.533, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 319.2445068359375, 'policy_loss': -0.13714276254177094, 'vf_explained_var': 0.04726093262434006, 'entropy': 7.806724548339844, 'cur_lr': 4.999999873689376e-05, 'total_loss': 319.1560363769531, 'kl': 0.014237035065889359}, 'load_time_ms': 0.704, 'num_steps_sampled': 571200, 'update_time_ms': 2.595}",476,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.48196768760681,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,571200,571200,{},476,78,-87.78543487703158,2025-09-04_21-14-28,8.001416105329282,3651948,1757013268,-14.876747676421784,18092.151755332947,19287,14.96
+cda-server-2,False,18134.105527162552,"{'sample_time_ms': 41548.717, 'num_steps_trained': 572400, 'grad_time_ms': 366.91, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 217.2244110107422, 'policy_loss': -0.15130357444286346, 'vf_explained_var': 0.03335639461874962, 'entropy': 7.664278507232666, 'cur_lr': 4.999999873689376e-05, 'total_loss': 217.1077423095703, 'kl': 0.010144203901290894}, 'load_time_ms': 0.705, 'num_steps_sampled': 572400, 'update_time_ms': 2.613}",477,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.9537718296051,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,572400,572400,{},477,90,-85.62620526679551,2025-09-04_21-15-10,8.000000408650061,3651948,1757013310,-11.832322942400586,18134.105527162552,19377,13.26
+cda-server-2,False,18176.570024728775,"{'sample_time_ms': 41490.993, 'num_steps_trained': 573600, 'grad_time_ms': 369.408, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 240.44708251953125, 'policy_loss': -0.13824151456356049, 'vf_explained_var': 0.041959889233112335, 'entropy': 8.172922134399414, 'cur_lr': 4.999999873689376e-05, 'total_loss': 240.34580993652344, 'kl': 0.010818732902407646}, 'load_time_ms': 0.724, 'num_steps_sampled': 573600, 'update_time_ms': 2.572}",478,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.464497566223145,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,573600,573600,{},478,82,-86.89646622239142,2025-09-04_21-15-53,8.000167905287384,3651948,1757013353,-13.68312573920506,18176.570024728775,19459,14.43
+cda-server-2,False,18218.882836580276,"{'sample_time_ms': 41485.94, 'num_steps_trained': 574800, 'grad_time_ms': 371.547, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 191.9720916748047, 'policy_loss': -0.14333170652389526, 'vf_explained_var': 0.0873849019408226, 'entropy': 8.113423347473145, 'cur_lr': 4.999999873689376e-05, 'total_loss': 191.86610412597656, 'kl': 0.01093095913529396}, 'load_time_ms': 0.719, 'num_steps_sampled': 574800, 'update_time_ms': 2.558}",479,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.312811851501465,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,574800,574800,{},479,85,-69.91975954756731,2025-09-04_21-16-35,8.0024867008069,3651948,1757013395,-11.845564886456053,18218.882836580276,19544,13.47
+cda-server-2,False,18260.60901069641,"{'sample_time_ms': 41477.532, 'num_steps_trained': 576000, 'grad_time_ms': 369.917, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 255.5122833251953, 'policy_loss': -0.14694522321224213, 'vf_explained_var': 0.0370616652071476, 'entropy': 7.742955684661865, 'cur_lr': 4.999999873689376e-05, 'total_loss': 255.4047088623047, 'kl': 0.011521845124661922}, 'load_time_ms': 0.712, 'num_steps_sampled': 576000, 'update_time_ms': 2.582}",480,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.726174116134644,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,576000,576000,{},480,80,-69.38365356743621,2025-09-04_21-17-17,8.0024867008069,3651948,1757013437,-14.388228990457339,18260.60901069641,19624,14.88
+cda-server-2,False,18302.56862616539,"{'sample_time_ms': 41542.479, 'num_steps_trained': 577200, 'grad_time_ms': 369.968, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 239.53497314453125, 'policy_loss': -0.15021011233329773, 'vf_explained_var': 0.06270802021026611, 'entropy': 8.896788597106934, 'cur_lr': 4.999999873689376e-05, 'total_loss': 239.42184448242188, 'kl': 0.010851171799004078}, 'load_time_ms': 0.705, 'num_steps_sampled': 577200, 'update_time_ms': 2.632}",481,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.95961546897888,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,577200,577200,{},481,74,-83.22713405036208,2025-09-04_21-17-59,8.00002845257644,3651948,1757013479,-16.78376223473995,18302.56862616539,19698,16.22
+cda-server-2,False,18344.443053245544,"{'sample_time_ms': 41571.379, 'num_steps_trained': 578400, 'grad_time_ms': 370.209, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 243.20068359375, 'policy_loss': -0.15146000683307648, 'vf_explained_var': 0.07553044706583023, 'entropy': 8.30219554901123, 'cur_lr': 4.999999873689376e-05, 'total_loss': 243.08531188964844, 'kl': 0.010568010620772839}, 'load_time_ms': 0.715, 'num_steps_sampled': 578400, 'update_time_ms': 2.637}",482,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.87442708015442,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,578400,578400,{},482,82,-82.03602098541046,2025-09-04_21-18-41,8.000262047940932,3651948,1757013521,-13.913369093101737,18344.443053245544,19780,14.57
+cda-server-2,False,18387.00535440445,"{'sample_time_ms': 41644.671, 'num_steps_trained': 579600, 'grad_time_ms': 372.728, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 294.4358825683594, 'policy_loss': -0.14570266008377075, 'vf_explained_var': 0.04153982922434807, 'entropy': 8.038591384887695, 'cur_lr': 4.999999873689376e-05, 'total_loss': 294.3316650390625, 'kl': 0.012156561017036438}, 'load_time_ms': 0.714, 'num_steps_sampled': 579600, 'update_time_ms': 2.626}",483,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.56230115890503,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,579600,579600,{},483,86,-85.34388832913162,2025-09-04_21-19-23,8.000020010806644,3651948,1757013563,-13.433089765628749,18387.00535440445,19866,14.17
+cda-server-2,False,18428.3811314106,"{'sample_time_ms': 41574.855, 'num_steps_trained': 580800, 'grad_time_ms': 372.621, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 251.95838928222656, 'policy_loss': -0.140852153301239, 'vf_explained_var': 0.05595090612769127, 'entropy': 8.06338119506836, 'cur_lr': 4.999999873689376e-05, 'total_loss': 251.8585662841797, 'kl': 0.012006484903395176}, 'load_time_ms': 0.71, 'num_steps_sampled': 580800, 'update_time_ms': 2.641}",484,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.37577700614929,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,580800,580800,{},484,82,-85.5026981462562,2025-09-04_21-20-05,8.000003756576547,3651948,1757013605,-15.357466498791851,18428.3811314106,19948,15.33
+cda-server-2,False,18470.653317928314,"{'sample_time_ms': 41618.58, 'num_steps_trained': 582000, 'grad_time_ms': 371.639, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 226.7355194091797, 'policy_loss': -0.13351666927337646, 'vf_explained_var': 0.044881563633680344, 'entropy': 7.895392417907715, 'cur_lr': 4.999999873689376e-05, 'total_loss': 226.64395141601562, 'kl': 0.012266373261809349}, 'load_time_ms': 0.707, 'num_steps_sampled': 582000, 'update_time_ms': 2.634}",485,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.272186517715454,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,582000,582000,{},485,91,-85.5026981462562,2025-09-04_21-20-47,8.0001818373437,3651948,1757013647,-11.712298226184485,18470.653317928314,20039,13.24
+cda-server-2,False,18512.860308885574,"{'sample_time_ms': 41691.245, 'num_steps_trained': 583200, 'grad_time_ms': 371.496, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 177.91851806640625, 'policy_loss': -0.14544419944286346, 'vf_explained_var': 0.062186818569898605, 'entropy': 7.850541114807129, 'cur_lr': 4.999999873689376e-05, 'total_loss': 177.8248291015625, 'kl': 0.015139114111661911}, 'load_time_ms': 0.705, 'num_steps_sampled': 583200, 'update_time_ms': 2.619}",486,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.20699095726013,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,583200,583200,{},486,87,-72.88723044974441,2025-09-04_21-21-29,8.000000425461572,3651948,1757013689,-13.211649018999122,18512.860308885574,20126,14.18
+cda-server-2,False,18554.72178196907,"{'sample_time_ms': 41681.549, 'num_steps_trained': 584400, 'grad_time_ms': 371.942, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 220.20993041992188, 'policy_loss': -0.1493559181690216, 'vf_explained_var': 0.05542575567960739, 'entropy': 8.345026969909668, 'cur_lr': 4.999999873689376e-05, 'total_loss': 220.09539794921875, 'kl': 0.010188949294388294}, 'load_time_ms': 0.715, 'num_steps_sampled': 584400, 'update_time_ms': 2.579}",487,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.861473083496094,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,584400,584400,{},487,84,-85.36710238689737,2025-09-04_21-22-11,8.00017241267457,3651948,1757013731,-13.453084415477218,18554.72178196907,20210,14.39
+cda-server-2,False,18596.359052419662,"{'sample_time_ms': 41599.759, 'num_steps_trained': 585600, 'grad_time_ms': 370.993, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 154.37660217285156, 'policy_loss': -0.1398123949766159, 'vf_explained_var': 0.03538002073764801, 'entropy': 8.030010223388672, 'cur_lr': 4.999999873689376e-05, 'total_loss': 154.27552795410156, 'kl': 0.011330515146255493}, 'load_time_ms': 0.703, 'num_steps_sampled': 585600, 'update_time_ms': 2.596}",488,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.63727045059204,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,585600,585600,{},488,83,-58.349158582427094,2025-09-04_21-22-53,8.000038198750534,3651948,1757013773,-13.73840384895593,18596.359052419662,20293,14.61
+cda-server-2,False,18639.371886968613,"{'sample_time_ms': 41672.031, 'num_steps_trained': 586800, 'grad_time_ms': 368.754, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 298.9637145996094, 'policy_loss': -0.14437498152256012, 'vf_explained_var': 0.036520641297101974, 'entropy': 8.234903335571289, 'cur_lr': 4.999999873689376e-05, 'total_loss': 298.8560485839844, 'kl': 0.010744307190179825}, 'load_time_ms': 0.705, 'num_steps_sampled': 586800, 'update_time_ms': 2.59}",489,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.012834548950195,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,586800,586800,{},489,76,-83.90387052434679,2025-09-04_21-23-36,8.000121486043561,3651948,1757013816,-14.819772329091979,18639.371886968613,20369,15.26
+cda-server-2,False,18680.802632570267,"{'sample_time_ms': 41642.37, 'num_steps_trained': 588000, 'grad_time_ms': 368.84, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 270.6208190917969, 'policy_loss': -0.14302177727222443, 'vf_explained_var': 0.035059988498687744, 'entropy': 8.234389305114746, 'cur_lr': 4.999999873689376e-05, 'total_loss': 270.5148010253906, 'kl': 0.010817685164511204}, 'load_time_ms': 0.714, 'num_steps_sampled': 588000, 'update_time_ms': 2.569}",490,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.43074560165405,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,588000,588000,{},490,82,-88.57029168170826,2025-09-04_21-24-17,8.00024900433902,3651948,1757013857,-16.010917741917307,18680.802632570267,20451,15.75
+cda-server-2,False,18722.966340780258,"{'sample_time_ms': 41662.296, 'num_steps_trained': 589200, 'grad_time_ms': 369.404, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 308.23187255859375, 'policy_loss': -0.1413438767194748, 'vf_explained_var': 0.05475946143269539, 'entropy': 8.307570457458496, 'cur_lr': 4.999999873689376e-05, 'total_loss': 308.12884521484375, 'kl': 0.011216908693313599}, 'load_time_ms': 0.712, 'num_steps_sampled': 589200, 'update_time_ms': 2.459}",491,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.163708209991455,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,589200,589200,{},491,84,-85.10439443954823,2025-09-04_21-25-00,8.000032190182104,3651948,1757013900,-13.215847781637063,18722.966340780258,20535,14.19
+cda-server-2,False,18764.273517370224,"{'sample_time_ms': 41606.297, 'num_steps_trained': 590400, 'grad_time_ms': 368.711, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 333.9530944824219, 'policy_loss': -0.12836137413978577, 'vf_explained_var': 0.023143529891967773, 'entropy': 8.049270629882812, 'cur_lr': 4.999999873689376e-05, 'total_loss': 333.8704528808594, 'kl': 0.013377728872001171}, 'load_time_ms': 0.704, 'num_steps_sampled': 590400, 'update_time_ms': 2.461}",492,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.30717658996582,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,590400,590400,{},492,75,-90.14017845056321,2025-09-04_21-25-41,8.000000400039598,3651948,1757013941,-15.350974614268544,18764.273517370224,20610,15.16
+cda-server-2,False,18805.587491750717,"{'sample_time_ms': 41481.795, 'num_steps_trained': 591600, 'grad_time_ms': 368.342, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 322.06427001953125, 'policy_loss': -0.14064206182956696, 'vf_explained_var': 0.023292958736419678, 'entropy': 7.97336483001709, 'cur_lr': 4.999999873689376e-05, 'total_loss': 321.9604187011719, 'kl': 0.010765206068754196}, 'load_time_ms': 0.716, 'num_steps_sampled': 591600, 'update_time_ms': 2.462}",493,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.313974380493164,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,591600,591600,{},493,80,-86.66675106723396,2025-09-04_21-26-22,8.000050889980269,3651948,1757013982,-15.626723614996253,18805.587491750717,20690,15.24
+cda-server-2,False,18847.166501522064,"{'sample_time_ms': 41501.811, 'num_steps_trained': 592800, 'grad_time_ms': 368.62, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 320.3753662109375, 'policy_loss': -0.15322066843509674, 'vf_explained_var': 0.052800972014665604, 'entropy': 8.03237533569336, 'cur_lr': 4.999999873689376e-05, 'total_loss': 320.260009765625, 'kl': 0.011088002473115921}, 'load_time_ms': 0.714, 'num_steps_sampled': 592800, 'update_time_ms': 2.497}",494,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.579009771347046,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,592800,592800,{},494,76,-86.6857093331291,2025-09-04_21-27-04,8.000071705452596,3651948,1757014024,-13.958478779205585,18847.166501522064,20766,14.54
+cda-server-2,False,18890.30220270157,"{'sample_time_ms': 41586.374, 'num_steps_trained': 594000, 'grad_time_ms': 370.329, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 176.98995971679688, 'policy_loss': -0.1451566517353058, 'vf_explained_var': 0.05666474997997284, 'entropy': 7.881124019622803, 'cur_lr': 4.999999873689376e-05, 'total_loss': 176.88229370117188, 'kl': 0.010970203205943108}, 'load_time_ms': 0.714, 'num_steps_sampled': 594000, 'update_time_ms': 2.497}",495,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.135701179504395,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,594000,594000,{},495,101,-83.68284583938473,2025-09-04_21-27-47,8.000077517396324,3651948,1757014067,-10.270964668990766,18890.30220270157,20867,12.465346534653465
+cda-server-2,False,18931.804030179977,"{'sample_time_ms': 41515.342, 'num_steps_trained': 595200, 'grad_time_ms': 370.796, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 217.40060424804688, 'policy_loss': -0.1363876461982727, 'vf_explained_var': 0.04078206792473793, 'entropy': 8.477455139160156, 'cur_lr': 4.999999873689376e-05, 'total_loss': 217.304443359375, 'kl': 0.011769948527216911}, 'load_time_ms': 0.719, 'num_steps_sampled': 595200, 'update_time_ms': 2.488}",496,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.50182747840881,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,595200,595200,{},496,87,-81.89818780485777,2025-09-04_21-28-28,8.000077897314387,3651948,1757014108,-11.506748530268467,18931.804030179977,20954,13.3
+cda-server-2,False,18973.457051038742,"{'sample_time_ms': 41494.912, 'num_steps_trained': 596400, 'grad_time_ms': 370.361, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 191.2285919189453, 'policy_loss': -0.142458975315094, 'vf_explained_var': 0.02932678908109665, 'entropy': 7.927573204040527, 'cur_lr': 4.999999873689376e-05, 'total_loss': 191.1237030029297, 'kl': 0.010983546264469624}, 'load_time_ms': 0.705, 'num_steps_sampled': 596400, 'update_time_ms': 2.547}",497,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.65302085876465,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,596400,596400,{},497,87,-85.96878336102286,2025-09-04_21-29-10,8.000152657205481,3651948,1757014150,-12.040197633921432,18973.457051038742,21041,13.56
+cda-server-2,False,19015.47111916542,"{'sample_time_ms': 41533.256, 'num_steps_trained': 597600, 'grad_time_ms': 369.677, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 171.41409301757812, 'policy_loss': -0.13801419734954834, 'vf_explained_var': 0.07307276874780655, 'entropy': 7.996822834014893, 'cur_lr': 4.999999873689376e-05, 'total_loss': 171.31585693359375, 'kl': 0.011637212708592415}, 'load_time_ms': 0.69, 'num_steps_sampled': 597600, 'update_time_ms': 2.56}",498,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.01406812667847,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,597600,597600,{},498,97,-81.78525146421343,2025-09-04_21-29-52,8.00011910772374,3651948,1757014192,-10.233689571593073,19015.47111916542,21138,12.48
+cda-server-2,False,19057.868161678314,"{'sample_time_ms': 41469.932, 'num_steps_trained': 598800, 'grad_time_ms': 371.355, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 203.18177795410156, 'policy_loss': -0.14015284180641174, 'vf_explained_var': 0.07815537601709366, 'entropy': 8.00692367553711, 'cur_lr': 4.999999873689376e-05, 'total_loss': 203.07766723632812, 'kl': 0.01054619625210762}, 'load_time_ms': 0.688, 'num_steps_sampled': 598800, 'update_time_ms': 2.57}",499,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.39704251289368,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,598800,598800,{},499,98,-84.37047594622636,2025-09-04_21-30-35,8.000067198878867,3651948,1757014235,-9.81541174713408,19057.868161678314,21236,12.29
+cda-server-2,False,19100.248570919037,"{'sample_time_ms': 41565.004, 'num_steps_trained': 600000, 'grad_time_ms': 371.225, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 228.5944061279297, 'policy_loss': -0.13729991018772125, 'vf_explained_var': 0.06894998252391815, 'entropy': 8.258490562438965, 'cur_lr': 4.999999873689376e-05, 'total_loss': 228.5061798095703, 'kl': 0.014364050701260567}, 'load_time_ms': 0.68, 'num_steps_sampled': 600000, 'update_time_ms': 2.607}",500,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.380409240722656,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,600000,600000,{},500,107,-85.03451509587148,2025-09-04_21-31-17,8.000095531447425,3651948,1757014277,-8.105283058888311,19100.248570919037,21343,11.233644859813085
+cda-server-2,False,19142.855487585068,"{'sample_time_ms': 41607.908, 'num_steps_trained': 601200, 'grad_time_ms': 372.662, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 235.8614044189453, 'policy_loss': -0.14118488132953644, 'vf_explained_var': 0.0441315695643425, 'entropy': 7.8184638023376465, 'cur_lr': 4.999999873689376e-05, 'total_loss': 235.7564239501953, 'kl': 0.010595940053462982}, 'load_time_ms': 0.687, 'num_steps_sampled': 601200, 'update_time_ms': 2.622}",501,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.606916666030884,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,601200,601200,{},501,102,-83.40710671169376,2025-09-04_21-32-00,8.000040356716635,3651948,1757014320,-8.77457060067311,19142.855487585068,21445,11.647058823529411
+cda-server-2,False,19185.00794363022,"{'sample_time_ms': 41693.855, 'num_steps_trained': 602400, 'grad_time_ms': 371.213, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 171.4715118408203, 'policy_loss': -0.1402779221534729, 'vf_explained_var': 0.05734093859791756, 'entropy': 8.267672538757324, 'cur_lr': 4.999999873689376e-05, 'total_loss': 171.36724853515625, 'kl': 0.010545175522565842}, 'load_time_ms': 0.687, 'num_steps_sampled': 602400, 'update_time_ms': 2.637}",502,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.15245604515076,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,602400,602400,{},502,92,-79.99049099733877,2025-09-04_21-32-42,8.000000510065377,3651948,1757014362,-11.151294582043896,19185.00794363022,21537,13.07
+cda-server-2,False,19226.318054676056,"{'sample_time_ms': 41693.802, 'num_steps_trained': 603600, 'grad_time_ms': 370.91, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 243.47238159179688, 'policy_loss': -0.14067596197128296, 'vf_explained_var': 0.0448896624147892, 'entropy': 7.9817070960998535, 'cur_lr': 4.999999873689376e-05, 'total_loss': 243.37454223632812, 'kl': 0.012534737586975098}, 'load_time_ms': 0.685, 'num_steps_sampled': 603600, 'update_time_ms': 2.64}",503,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.3101110458374,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,603600,603600,{},503,82,-85.32329285777173,2025-09-04_21-33-23,8.000089403141466,3651948,1757014403,-12.466998373239223,19226.318054676056,21619,13.69
+cda-server-2,False,19268.337609052658,"{'sample_time_ms': 41735.891, 'num_steps_trained': 604800, 'grad_time_ms': 372.908, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 214.1730194091797, 'policy_loss': -0.1486336588859558, 'vf_explained_var': 0.058973655104637146, 'entropy': 7.902002334594727, 'cur_lr': 4.999999873689376e-05, 'total_loss': 214.05941772460938, 'kl': 0.01025424711406231}, 'load_time_ms': 0.687, 'num_steps_sampled': 604800, 'update_time_ms': 2.584}",504,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.01955437660217,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,604800,604800,{},504,88,-85.55724740222443,2025-09-04_21-34-05,8.000128399267489,3651948,1757014445,-11.976474986293942,19268.337609052658,21707,13.49
+cda-server-2,False,19310.27505350113,"{'sample_time_ms': 41617.261, 'num_steps_trained': 606000, 'grad_time_ms': 371.731, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 231.47146606445312, 'policy_loss': -0.14275382459163666, 'vf_explained_var': 0.06723830848932266, 'entropy': 7.642672061920166, 'cur_lr': 4.999999873689376e-05, 'total_loss': 231.36834716796875, 'kl': 0.01160765066742897}, 'load_time_ms': 0.675, 'num_steps_sampled': 606000, 'update_time_ms': 2.58}",505,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.93744444847107,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,606000,606000,{},505,99,-85.36671799581443,2025-09-04_21-34-47,8.000237627338208,3651948,1757014487,-9.127520334398966,19310.27505350113,21806,11.79
+cda-server-2,False,19351.926946878433,"{'sample_time_ms': 41632.964, 'num_steps_trained': 607200, 'grad_time_ms': 371.038, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 272.1002502441406, 'policy_loss': -0.14265893399715424, 'vf_explained_var': 0.05283678323030472, 'entropy': 8.153338432312012, 'cur_lr': 4.999999873689376e-05, 'total_loss': 271.9993896484375, 'kl': 0.012219791300594807}, 'load_time_ms': 0.672, 'num_steps_sampled': 607200, 'update_time_ms': 2.606}",506,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.65189337730408,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,607200,607200,{},506,80,-83.23732806632651,2025-09-04_21-35-29,8.000237627338208,3651948,1757014529,-13.198573957128403,19351.926946878433,21886,14.29
+cda-server-2,False,19394.843361377716,"{'sample_time_ms': 41761.77, 'num_steps_trained': 608400, 'grad_time_ms': 368.611, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 275.6209716796875, 'policy_loss': -0.14859682321548462, 'vf_explained_var': 0.04044681042432785, 'entropy': 7.9257965087890625, 'cur_lr': 4.999999873689376e-05, 'total_loss': 275.51068115234375, 'kl': 0.011200634762644768}, 'load_time_ms': 0.676, 'num_steps_sampled': 608400, 'update_time_ms': 2.591}",507,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.91641449928284,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,608400,608400,{},507,87,-85.27599767400417,2025-09-04_21-36-12,8.0000977022113,3651948,1757014572,-12.962113395581076,19394.843361377716,21973,14.0
+cda-server-2,False,19437.19703555107,"{'sample_time_ms': 41795.476, 'num_steps_trained': 609600, 'grad_time_ms': 368.913, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 225.57896423339844, 'policy_loss': -0.15414175391197205, 'vf_explained_var': 0.04575726017355919, 'entropy': 7.972322463989258, 'cur_lr': 4.999999873689376e-05, 'total_loss': 225.46044921875, 'kl': 0.010422691702842712}, 'load_time_ms': 0.672, 'num_steps_sampled': 609600, 'update_time_ms': 2.549}",508,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.3536741733551,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,609600,609600,{},508,91,-83.26347834598693,2025-09-04_21-36-54,8.00026367214025,3651948,1757014614,-10.85026440743498,19437.19703555107,22064,12.9
+cda-server-2,False,19478.833067178726,"{'sample_time_ms': 41719.28, 'num_steps_trained': 610800, 'grad_time_ms': 368.994, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 202.0901641845703, 'policy_loss': -0.14258527755737305, 'vf_explained_var': 0.04914379119873047, 'entropy': 7.961720943450928, 'cur_lr': 4.999999873689376e-05, 'total_loss': 201.99142456054688, 'kl': 0.012831298634409904}, 'load_time_ms': 0.672, 'num_steps_sampled': 610800, 'update_time_ms': 2.583}",509,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.63603162765503,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,610800,610800,{},509,86,-84.14968091451667,2025-09-04_21-37-36,8.000110315983827,3651948,1757014656,-13.132585803095791,19478.833067178726,22150,14.16
+cda-server-2,False,19521.673230409622,"{'sample_time_ms': 41763.259, 'num_steps_trained': 612000, 'grad_time_ms': 370.994, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 169.8279266357422, 'policy_loss': -0.16057568788528442, 'vf_explained_var': 0.05376378074288368, 'entropy': 8.064464569091797, 'cur_lr': 4.999999873689376e-05, 'total_loss': 169.70916748046875, 'kl': 0.012240959331393242}, 'load_time_ms': 0.687, 'num_steps_sampled': 612000, 'update_time_ms': 2.56}",510,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.840163230895996,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,612000,612000,{},510,94,-55.10036498289578,2025-09-04_21-38-19,8.000166144923092,3651948,1757014699,-9.811099317125645,19521.673230409622,22244,12.41
+cda-server-2,False,19564.705486774445,"{'sample_time_ms': 41806.25, 'num_steps_trained': 613200, 'grad_time_ms': 370.523, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 187.50811767578125, 'policy_loss': -0.1564158797264099, 'vf_explained_var': 0.06112748384475708, 'entropy': 7.912282943725586, 'cur_lr': 4.999999873689376e-05, 'total_loss': 187.3908233642578, 'kl': 0.011446958407759666}, 'load_time_ms': 0.69, 'num_steps_sampled': 613200, 'update_time_ms': 2.52}",511,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.03225636482239,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,613200,613200,{},511,95,-82.31459558586661,2025-09-04_21-39-02,8.00001957593356,3651948,1757014742,-10.898860485843786,19564.705486774445,22339,12.88
+cda-server-2,False,19607.551896333694,"{'sample_time_ms': 41875.607, 'num_steps_trained': 614400, 'grad_time_ms': 370.587, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 234.76763916015625, 'policy_loss': -0.13921450078487396, 'vf_explained_var': 0.0512005016207695, 'entropy': 7.785059452056885, 'cur_lr': 4.999999873689376e-05, 'total_loss': 234.6659393310547, 'kl': 0.010978585109114647}, 'load_time_ms': 0.69, 'num_steps_sampled': 614400, 'update_time_ms': 2.515}",512,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.84640955924988,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,614400,614400,{},512,92,-86.59427325417099,2025-09-04_21-39-45,8.000028056359978,3651948,1757014785,-11.19351792817233,19607.551896333694,22431,12.97
+cda-server-2,False,19649.291892528534,"{'sample_time_ms': 41920.281, 'num_steps_trained': 615600, 'grad_time_ms': 368.975, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 283.98846435546875, 'policy_loss': -0.14622753858566284, 'vf_explained_var': 0.05408765748143196, 'entropy': 8.071061134338379, 'cur_lr': 4.999999873689376e-05, 'total_loss': 283.8768005371094, 'kl': 0.010110199451446533}, 'load_time_ms': 0.683, 'num_steps_sampled': 615600, 'update_time_ms': 2.486}",513,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.73999619483948,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,615600,615600,{},513,82,-88.18906040253121,2025-09-04_21-40-26,8.000000799052916,3651948,1757014826,-14.583852783262136,19649.291892528534,22513,14.93
+cda-server-2,False,19691.59937596321,"{'sample_time_ms': 41950.944, 'num_steps_trained': 616800, 'grad_time_ms': 367.111, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 167.95440673828125, 'policy_loss': -0.14656759798526764, 'vf_explained_var': 0.05410350114107132, 'entropy': 7.895658016204834, 'cur_lr': 4.999999873689376e-05, 'total_loss': 167.83981323242188, 'kl': 0.009352294728159904}, 'load_time_ms': 0.679, 'num_steps_sampled': 616800, 'update_time_ms': 2.512}",514,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.307483434677124,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,616800,616800,{},514,102,-65.38370327928517,2025-09-04_21-41-09,8.000010020065147,3651948,1757014869,-8.787935336787926,19691.59937596321,22615,11.705882352941176
+cda-server-2,False,19734.171117067337,"{'sample_time_ms': 42012.093, 'num_steps_trained': 618000, 'grad_time_ms': 369.426, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 203.5995330810547, 'policy_loss': -0.13262474536895752, 'vf_explained_var': 0.05613193288445473, 'entropy': 7.793171405792236, 'cur_lr': 4.999999873689376e-05, 'total_loss': 203.51361083984375, 'kl': 0.013668078929185867}, 'load_time_ms': 0.686, 'num_steps_sampled': 618000, 'update_time_ms': 2.548}",515,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.57174110412598,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,618000,618000,{},515,96,-82.70095046531756,2025-09-04_21-41-51,8.000124115491843,3651948,1757014911,-10.216002144067112,19734.171117067337,22711,12.5
+cda-server-2,False,19777.31126642227,"{'sample_time_ms': 42162.224, 'num_steps_trained': 619200, 'grad_time_ms': 368.153, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 198.68246459960938, 'policy_loss': -0.15813376009464264, 'vf_explained_var': 0.041515424847602844, 'entropy': 8.062461853027344, 'cur_lr': 4.999999873689376e-05, 'total_loss': 198.56333923339844, 'kl': 0.011416195891797543}, 'load_time_ms': 0.687, 'num_steps_sampled': 619200, 'update_time_ms': 2.538}",516,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.14014935493469,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,619200,619200,{},516,90,-71.87562082100415,2025-09-04_21-42-34,10.0,3651948,1757014954,-11.338310907119634,19777.31126642227,22801,13.24
+cda-server-2,False,19818.962617635727,"{'sample_time_ms': 42036.298, 'num_steps_trained': 620400, 'grad_time_ms': 367.631, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 229.8743438720703, 'policy_loss': -0.15090160071849823, 'vf_explained_var': 0.060195956379175186, 'entropy': 7.647896766662598, 'cur_lr': 4.999999873689376e-05, 'total_loss': 229.7586212158203, 'kl': 0.010290677659213543}, 'load_time_ms': 0.679, 'num_steps_sampled': 620400, 'update_time_ms': 2.499}",517,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.6513512134552,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,620400,620400,{},517,83,-86.42919798304267,2025-09-04_21-43-16,8.000240581087791,3651948,1757014996,-12.626468233448689,19818.962617635727,22884,13.76
+cda-server-2,False,19860.863934993744,"{'sample_time_ms': 41992.832, 'num_steps_trained': 621600, 'grad_time_ms': 365.9, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 263.2057189941406, 'policy_loss': -0.1419185847043991, 'vf_explained_var': 0.06125001236796379, 'entropy': 7.386011600494385, 'cur_lr': 4.999999873689376e-05, 'total_loss': 263.1047058105469, 'kl': 0.011972821317613125}, 'load_time_ms': 0.683, 'num_steps_sampled': 621600, 'update_time_ms': 2.5}",518,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.90131735801697,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,621600,621600,{},518,93,-90.94633259277349,2025-09-04_21-43-58,8.000256359887995,3651948,1757015038,-11.775377329612343,19860.863934993744,22977,13.18
+cda-server-2,False,19904.311608552933,"{'sample_time_ms': 42174.543, 'num_steps_trained': 622800, 'grad_time_ms': 365.422, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 214.76736450195312, 'policy_loss': -0.14149066805839539, 'vf_explained_var': 0.05032728239893913, 'entropy': 7.764161109924316, 'cur_lr': 4.999999873689376e-05, 'total_loss': 214.66229248046875, 'kl': 0.010659274645149708}, 'load_time_ms': 0.686, 'num_steps_sampled': 622800, 'update_time_ms': 2.46}",519,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.44767355918884,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,622800,622800,{},519,100,-82.56335530104904,2025-09-04_21-44-41,8.000054979352926,3651948,1757015081,-9.432898509410775,19904.311608552933,23077,12.04
+cda-server-2,False,19946.00028705597,"{'sample_time_ms': 42059.589, 'num_steps_trained': 624000, 'grad_time_ms': 365.233, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 219.71600341796875, 'policy_loss': -0.1405024528503418, 'vf_explained_var': 0.05571660026907921, 'entropy': 8.026354789733887, 'cur_lr': 4.999999873689376e-05, 'total_loss': 219.6128387451172, 'kl': 0.010930254124104977}, 'load_time_ms': 0.68, 'num_steps_sampled': 624000, 'update_time_ms': 2.458}",520,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.6886785030365,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,624000,624000,{},520,94,-68.88135622270947,2025-09-04_21-45-23,8.001130149147663,3651948,1757015123,-10.280514974476594,19946.00028705597,23171,12.65
+cda-server-2,False,19988.20765209198,"{'sample_time_ms': 41976.038, 'num_steps_trained': 625200, 'grad_time_ms': 366.25, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 236.67010498046875, 'policy_loss': -0.13375553488731384, 'vf_explained_var': 0.05492662638425827, 'entropy': 7.896309852600098, 'cur_lr': 4.999999873689376e-05, 'total_loss': 236.57696533203125, 'kl': 0.011888116598129272}, 'load_time_ms': 0.673, 'num_steps_sampled': 625200, 'update_time_ms': 2.556}",521,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.20736503601074,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,625200,625200,{},521,103,-86.73095527556067,2025-09-04_21-46-05,8.000027274938793,3651948,1757015165,-8.453045250321795,19988.20765209198,23274,11.427184466019417
+cda-server-2,False,20030.421869277954,"{'sample_time_ms': 41910.561, 'num_steps_trained': 626400, 'grad_time_ms': 368.48, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 201.26467895507812, 'policy_loss': -0.14536257088184357, 'vf_explained_var': 0.054645001888275146, 'entropy': 8.036338806152344, 'cur_lr': 4.999999873689376e-05, 'total_loss': 201.15565490722656, 'kl': 0.010635611601173878}, 'load_time_ms': 0.672, 'num_steps_sampled': 626400, 'update_time_ms': 2.566}",522,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.21421718597412,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,626400,626400,{},522,91,-82.13765561179979,2025-09-04_21-46-48,8.000044330717463,3651948,1757015208,-11.049306426188457,20030.421869277954,23365,13.09
+cda-server-2,False,20073.167887210846,"{'sample_time_ms': 42010.584, 'num_steps_trained': 627600, 'grad_time_ms': 368.963, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 190.387939453125, 'policy_loss': -0.1575956642627716, 'vf_explained_var': 0.05341558903455734, 'entropy': 7.737266540527344, 'cur_lr': 4.999999873689376e-05, 'total_loss': 190.26390075683594, 'kl': 0.009819424711167812}, 'load_time_ms': 0.676, 'num_steps_sampled': 627600, 'update_time_ms': 2.583}",523,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.746017932891846,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,627600,627600,{},523,108,-87.63316378268442,2025-09-04_21-47-30,8.000067181155115,3651948,1757015250,-8.18975483820423,20073.167887210846,23473,11.24074074074074
+cda-server-2,False,20115.03004169464,"{'sample_time_ms': 41966.073, 'num_steps_trained': 628800, 'grad_time_ms': 368.868, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 265.75616455078125, 'policy_loss': -0.1417827606201172, 'vf_explained_var': 0.047600045800209045, 'entropy': 7.7846832275390625, 'cur_lr': 4.999999873689376e-05, 'total_loss': 265.66131591796875, 'kl': 0.013741587288677692}, 'load_time_ms': 0.676, 'num_steps_sampled': 628800, 'update_time_ms': 2.603}",524,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.862154483795166,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,628800,628800,{},524,92,-86.3289429558955,2025-09-04_21-48-12,8.000313690816663,3651948,1757015292,-11.135327043898128,20115.03004169464,23565,12.95
+cda-server-2,False,20157.006575107574,"{'sample_time_ms': 41907.452, 'num_steps_trained': 630000, 'grad_time_ms': 367.909, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 178.26414489746094, 'policy_loss': -0.15132449567317963, 'vf_explained_var': 0.04761023074388504, 'entropy': 7.840459823608398, 'cur_lr': 4.999999873689376e-05, 'total_loss': 178.15179443359375, 'kl': 0.011396270245313644}, 'load_time_ms': 0.676, 'num_steps_sampled': 630000, 'update_time_ms': 2.595}",525,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.97653341293335,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,630000,630000,{},525,103,-66.89724537728738,2025-09-04_21-48-54,8.000001494058395,3651948,1757015334,-8.953779715239303,20157.006575107574,23668,11.815533980582524
+cda-server-2,False,20198.9055352211,"{'sample_time_ms': 41783.106, 'num_steps_trained': 631200, 'grad_time_ms': 368.134, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 216.79884338378906, 'policy_loss': -0.14230404794216156, 'vf_explained_var': 0.04844113066792488, 'entropy': 8.409723281860352, 'cur_lr': 4.999999873689376e-05, 'total_loss': 216.6897430419922, 'kl': 0.009710962884128094}, 'load_time_ms': 0.671, 'num_steps_sampled': 631200, 'update_time_ms': 2.611}",526,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.89896011352539,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,631200,631200,{},526,88,-83.77667437844606,2025-09-04_21-49-36,8.00013842138809,3651948,1757015376,-10.576268163691191,20198.9055352211,23756,12.89
+cda-server-2,False,20243.040204524994,"{'sample_time_ms': 42030.599, 'num_steps_trained': 632400, 'grad_time_ms': 368.897, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 113.05033111572266, 'policy_loss': -0.15707647800445557, 'vf_explained_var': 0.07800179719924927, 'entropy': 7.7117438316345215, 'cur_lr': 4.999999873689376e-05, 'total_loss': 112.93242645263672, 'kl': 0.01146283932030201}, 'load_time_ms': 0.671, 'num_steps_sampled': 632400, 'update_time_ms': 2.649}",527,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",44.13466930389404,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,632400,632400,{},527,101,-83.94824658273092,2025-09-04_21-50-20,8.000012988207692,3651948,1757015420,-9.315383407119686,20243.040204524994,23857,12.07920792079208
+cda-server-2,False,20285.569366931915,"{'sample_time_ms': 42090.799, 'num_steps_trained': 633600, 'grad_time_ms': 371.397, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 167.38116455078125, 'policy_loss': -0.15174798667430878, 'vf_explained_var': 0.06229621171951294, 'entropy': 7.947242736816406, 'cur_lr': 4.999999873689376e-05, 'total_loss': 167.26669311523438, 'kl': 0.010910822078585625}, 'load_time_ms': 0.677, 'num_steps_sampled': 633600, 'update_time_ms': 2.661}",528,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.52916240692139,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,633600,633600,{},528,99,-70.66269968829573,2025-09-04_21-51-03,8.00021981389855,3651948,1757015463,-9.506490660360424,20285.569366931915,23956,12.24
+cda-server-2,False,20327.706107139587,"{'sample_time_ms': 41959.027, 'num_steps_trained': 634800, 'grad_time_ms': 371.992, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 240.8870391845703, 'policy_loss': -0.14231714606285095, 'vf_explained_var': 0.03474872559309006, 'entropy': 8.10819149017334, 'cur_lr': 4.999999873689376e-05, 'total_loss': 240.78103637695312, 'kl': 0.010623510926961899}, 'load_time_ms': 0.691, 'num_steps_sampled': 634800, 'update_time_ms': 2.701}",529,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.13674020767212,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,634800,634800,{},529,91,-85.239240953459,2025-09-04_21-51-45,8.000204314450299,3651948,1757015505,-10.078748363195434,20327.706107139587,24047,12.44
+cda-server-2,False,20369.63042449951,"{'sample_time_ms': 41982.939, 'num_steps_trained': 636000, 'grad_time_ms': 371.612, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 206.69680786132812, 'policy_loss': -0.14226245880126953, 'vf_explained_var': 0.07164441049098969, 'entropy': 7.701772689819336, 'cur_lr': 4.999999873689376e-05, 'total_loss': 206.5907440185547, 'kl': 0.010596277192234993}, 'load_time_ms': 0.681, 'num_steps_sampled': 636000, 'update_time_ms': 2.716}",530,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.924317359924316,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,636000,636000,{},530,96,-66.25400937910553,2025-09-04_21-52-27,8.000051483620119,3651948,1757015547,-10.437434672321645,20369.63042449951,24143,12.75
+cda-server-2,False,20411.682535409927,"{'sample_time_ms': 41968.271, 'num_steps_trained': 637200, 'grad_time_ms': 370.594, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 183.31382751464844, 'policy_loss': -0.14274880290031433, 'vf_explained_var': 0.07200777530670166, 'entropy': 8.17143440246582, 'cur_lr': 4.999999873689376e-05, 'total_loss': 183.21058654785156, 'kl': 0.011561138555407524}, 'load_time_ms': 0.69, 'num_steps_sampled': 637200, 'update_time_ms': 2.834}",531,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.05211091041565,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,637200,637200,{},531,96,-84.72100763733563,2025-09-04_21-53-09,8.000220490082432,3651948,1757015589,-11.260624423523332,20411.682535409927,24239,13.16
+cda-server-2,False,20453.583587169647,"{'sample_time_ms': 41938.881, 'num_steps_trained': 638400, 'grad_time_ms': 368.67, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 160.25613403320312, 'policy_loss': -0.13671061396598816, 'vf_explained_var': 0.04929812252521515, 'entropy': 7.601922988891602, 'cur_lr': 4.999999873689376e-05, 'total_loss': 160.16278076171875, 'kl': 0.012693586759269238}, 'load_time_ms': 0.688, 'num_steps_sampled': 638400, 'update_time_ms': 2.798}",532,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.90105175971985,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,638400,638400,{},532,98,-55.67049051819933,2025-09-04_21-53-51,8.000001499128857,3651948,1757015631,-9.812918485032277,20453.583587169647,24337,12.38
+cda-server-2,False,20495.79337143898,"{'sample_time_ms': 41884.443, 'num_steps_trained': 639600, 'grad_time_ms': 369.485, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 220.09242248535156, 'policy_loss': -0.14294356107711792, 'vf_explained_var': 0.05856436491012573, 'entropy': 7.793384075164795, 'cur_lr': 4.999999873689376e-05, 'total_loss': 219.98651123046875, 'kl': 0.01083542313426733}, 'load_time_ms': 0.702, 'num_steps_sampled': 639600, 'update_time_ms': 2.842}",533,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.209784269332886,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,639600,639600,{},533,89,-85.23888771043163,2025-09-04_21-54-33,8.000471842720263,3651948,1757015673,-12.360378156761135,20495.79337143898,24426,13.8
+cda-server-2,False,20538.58450126648,"{'sample_time_ms': 41975.985, 'num_steps_trained': 640800, 'grad_time_ms': 370.911, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 210.44073486328125, 'policy_loss': -0.12847492098808289, 'vf_explained_var': 0.05711045488715172, 'entropy': 7.832577228546143, 'cur_lr': 4.999999873689376e-05, 'total_loss': 210.3465118408203, 'kl': 0.01003289595246315}, 'load_time_ms': 0.702, 'num_steps_sampled': 640800, 'update_time_ms': 2.796}",534,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.79112982749939,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,640800,640800,{},534,114,-85.3605670180534,2025-09-04_21-55-16,8.000115383356453,3651948,1757015716,-6.244545209865517,20538.58450126648,24540,10.263157894736842
+cda-server-2,False,20581.17125558853,"{'sample_time_ms': 42036.439, 'num_steps_trained': 642000, 'grad_time_ms': 371.452, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 180.33758544921875, 'policy_loss': -0.14919494092464447, 'vf_explained_var': 0.08063942939043045, 'entropy': 7.371814250946045, 'cur_lr': 4.999999873689376e-05, 'total_loss': 180.2249298095703, 'kl': 0.010697085410356522}, 'load_time_ms': 0.703, 'num_steps_sampled': 642000, 'update_time_ms': 2.778}",535,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.586754322052,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,642000,642000,{},535,105,-82.15509683219827,2025-09-04_21-55-59,8.000187407552426,3651948,1757015759,-8.857732547181975,20581.17125558853,24645,11.676190476190476
+cda-server-2,False,20623.618741750717,"{'sample_time_ms': 42088.729, 'num_steps_trained': 643200, 'grad_time_ms': 373.929, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 193.4102020263672, 'policy_loss': -0.14119069278240204, 'vf_explained_var': 0.07007483392953873, 'entropy': 7.85939359664917, 'cur_lr': 4.999999873689376e-05, 'total_loss': 193.30856323242188, 'kl': 0.011583573184907436}, 'load_time_ms': 0.717, 'num_steps_sampled': 643200, 'update_time_ms': 2.767}",536,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.44748616218567,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,643200,643200,{},536,107,-73.07308658846257,2025-09-04_21-56-41,8.000246705335524,3651948,1757015801,-7.952332301154359,20623.618741750717,24752,11.22429906542056
+cda-server-2,False,20666.022399425507,"{'sample_time_ms': 41913.773, 'num_steps_trained': 644400, 'grad_time_ms': 375.822, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 183.60745239257812, 'policy_loss': -0.14092332124710083, 'vf_explained_var': 0.07595738768577576, 'entropy': 7.580621719360352, 'cur_lr': 4.999999873689376e-05, 'total_loss': 183.50210571289062, 'kl': 0.010412870906293392}, 'load_time_ms': 0.724, 'num_steps_sampled': 644400, 'update_time_ms': 2.734}",537,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.40365767478943,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,644400,644400,{},537,102,-76.6381068952044,2025-09-04_21-57-24,8.000060991204217,3651948,1757015844,-9.224174098292405,20666.022399425507,24854,11.872549019607844
+cda-server-2,False,20708.202874422073,"{'sample_time_ms': 41878.127, 'num_steps_trained': 645600, 'grad_time_ms': 376.595, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 150.78045654296875, 'policy_loss': -0.14036937057971954, 'vf_explained_var': 0.0604521706700325, 'entropy': 8.07420539855957, 'cur_lr': 4.999999873689376e-05, 'total_loss': 150.67779541015625, 'kl': 0.011033102869987488}, 'load_time_ms': 0.725, 'num_steps_sampled': 645600, 'update_time_ms': 2.751}",538,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.18047499656677,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,645600,645600,{},538,93,-64.0044632570487,2025-09-04_21-58-06,8.000109288083836,3651948,1757015886,-10.368055644929912,20708.202874422073,24947,12.76
+cda-server-2,False,20751.16945695877,"{'sample_time_ms': 41962.579, 'num_steps_trained': 646800, 'grad_time_ms': 375.167, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 250.36949157714844, 'policy_loss': -0.1359543353319168, 'vf_explained_var': 0.06793617457151413, 'entropy': 7.885825157165527, 'cur_lr': 4.999999873689376e-05, 'total_loss': 250.27215576171875, 'kl': 0.011291067115962505}, 'load_time_ms': 0.713, 'num_steps_sampled': 646800, 'update_time_ms': 2.763}",539,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.96658253669739,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,646800,646800,{},539,106,-83.60032782351799,2025-09-04_21-58-49,8.000732029336234,3651948,1757015929,-8.182262984756873,20751.16945695877,25053,11.273584905660377
+cda-server-2,False,20793.905032873154,"{'sample_time_ms': 42044.398, 'num_steps_trained': 648000, 'grad_time_ms': 374.484, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 118.68406677246094, 'policy_loss': -0.15886139869689941, 'vf_explained_var': 0.07198840379714966, 'entropy': 8.058353424072266, 'cur_lr': 4.999999873689376e-05, 'total_loss': 118.56088256835938, 'kl': 0.010437482967972755}, 'load_time_ms': 0.715, 'num_steps_sampled': 648000, 'update_time_ms': 2.739}",540,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.735575914382935,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,648000,648000,{},540,108,-51.326081797036,2025-09-04_21-59-31,8.000190443829876,3651948,1757015971,-7.674543735063982,20793.905032873154,25161,11.175925925925926
+cda-server-2,False,20835.970939397812,"{'sample_time_ms': 42044.815, 'num_steps_trained': 649200, 'grad_time_ms': 375.55, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 236.36314392089844, 'policy_loss': -0.1319597363471985, 'vf_explained_var': 0.05775655806064606, 'entropy': 7.766454219818115, 'cur_lr': 4.999999873689376e-05, 'total_loss': 236.2706756591797, 'kl': 0.01154718641191721}, 'load_time_ms': 0.725, 'num_steps_sampled': 649200, 'update_time_ms': 2.633}",541,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.0659065246582,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,649200,649200,{},541,98,-87.4248928458613,2025-09-04_22-00-14,8.000020624924144,3651948,1757016014,-9.628970222012954,20835.970939397812,25259,12.23
+cda-server-2,False,20879.48354935646,"{'sample_time_ms': 42206.986, 'num_steps_trained': 650400, 'grad_time_ms': 374.532, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 255.753662109375, 'policy_loss': -0.1421698033809662, 'vf_explained_var': 0.05161063000559807, 'entropy': 7.748743057250977, 'cur_lr': 4.999999873689376e-05, 'total_loss': 255.64759826660156, 'kl': 0.010566272772848606}, 'load_time_ms': 0.724, 'num_steps_sampled': 650400, 'update_time_ms': 2.642}",542,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.51260995864868,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,650400,650400,{},542,98,-79.70860967904791,2025-09-04_22-00-57,8.002317469381081,3651948,1757016057,-9.308876267997848,20879.48354935646,25357,11.97
+cda-server-2,False,20922.198652267456,"{'sample_time_ms': 42255.839, 'num_steps_trained': 651600, 'grad_time_ms': 376.177, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 150.107177734375, 'policy_loss': -0.1413421779870987, 'vf_explained_var': 0.05147850513458252, 'entropy': 7.405312538146973, 'cur_lr': 4.999999873689376e-05, 'total_loss': 150.0113983154297, 'kl': 0.013330676592886448}, 'load_time_ms': 0.719, 'num_steps_sampled': 651600, 'update_time_ms': 2.64}",543,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.71510291099548,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,651600,651600,{},543,107,-55.664909103574814,2025-09-04_22-01-40,8.000392496864999,3651948,1757016100,-7.928721134357941,20922.198652267456,25464,11.214953271028037
+cda-server-2,False,20964.614531993866,"{'sample_time_ms': 42218.177, 'num_steps_trained': 652800, 'grad_time_ms': 376.25, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 192.20309448242188, 'policy_loss': -0.14467547833919525, 'vf_explained_var': 0.07436954975128174, 'entropy': 7.856789588928223, 'cur_lr': 4.999999873689376e-05, 'total_loss': 192.09791564941406, 'kl': 0.01155995111912489}, 'load_time_ms': 0.725, 'num_steps_sampled': 652800, 'update_time_ms': 2.688}",544,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.41587972640991,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,652800,652800,{},544,103,-67.23049233796695,2025-09-04_22-02-22,8.000194762814644,3651948,1757016142,-8.917267175899491,20964.614531993866,25567,11.951456310679612
+cda-server-2,False,21007.03317785263,"{'sample_time_ms': 42202.123, 'num_steps_trained': 654000, 'grad_time_ms': 375.528, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 192.97984313964844, 'policy_loss': -0.1451786607503891, 'vf_explained_var': 0.04516826197504997, 'entropy': 7.384739398956299, 'cur_lr': 4.999999873689376e-05, 'total_loss': 192.8732147216797, 'kl': 0.01127211656421423}, 'load_time_ms': 0.724, 'num_steps_sampled': 654000, 'update_time_ms': 2.733}",545,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.41864585876465,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,654000,654000,{},545,100,-61.937393190048056,2025-09-04_22-03-05,8.00012882417301,3651948,1757016185,-9.143666493636106,21007.03317785263,25667,11.96
+cda-server-2,False,21048.709080934525,"{'sample_time_ms': 42126.098, 'num_steps_trained': 655200, 'grad_time_ms': 374.427, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 257.3942565917969, 'policy_loss': -0.15175125002861023, 'vf_explained_var': 0.03823421895503998, 'entropy': 7.957895278930664, 'cur_lr': 4.999999873689376e-05, 'total_loss': 257.2807312011719, 'kl': 0.011188051663339138}, 'load_time_ms': 0.712, 'num_steps_sampled': 655200, 'update_time_ms': 2.722}",546,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",41.67590308189392,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,655200,655200,{},546,90,-84.22003202697721,2025-09-04_22-03-46,8.000097196435748,3651948,1757016226,-11.166475644820652,21048.709080934525,25757,13.2
+cda-server-2,False,21091.521733522415,"{'sample_time_ms': 42165.858, 'num_steps_trained': 656400, 'grad_time_ms': 375.512, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 252.97341918945312, 'policy_loss': -0.13348990678787231, 'vf_explained_var': 0.0418059304356575, 'entropy': 7.556629657745361, 'cur_lr': 4.999999873689376e-05, 'total_loss': 252.874755859375, 'kl': 0.010195381008088589}, 'load_time_ms': 0.722, 'num_steps_sampled': 656400, 'update_time_ms': 2.746}",547,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.812652587890625,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,656400,656400,{},547,106,-84.09118564531299,2025-09-04_22-04-29,8.000027542621563,3651948,1757016269,-8.242486668822895,21091.521733522415,25863,11.320754716981131
+cda-server-2,False,21134.377695083618,"{'sample_time_ms': 42235.05, 'num_steps_trained': 657600, 'grad_time_ms': 373.911, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 178.17941284179688, 'policy_loss': -0.13027864694595337, 'vf_explained_var': 0.05408206209540367, 'entropy': 7.640100479125977, 'cur_lr': 4.999999873689376e-05, 'total_loss': 178.09715270996094, 'kl': 0.014050977304577827}, 'load_time_ms': 0.713, 'num_steps_sampled': 657600, 'update_time_ms': 2.746}",548,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.855961561203,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,657600,657600,{},548,113,-66.75009070089386,2025-09-04_22-05-12,8.000064064065864,3651948,1757016312,-6.585518287778024,21134.377695083618,25976,10.495575221238939
+cda-server-2,False,21177.441816091537,"{'sample_time_ms': 42243.892, 'num_steps_trained': 658800, 'grad_time_ms': 374.828, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 162.55364990234375, 'policy_loss': -0.13918906450271606, 'vf_explained_var': 0.0474405363202095, 'entropy': 7.54951286315918, 'cur_lr': 4.999999873689376e-05, 'total_loss': 162.45347595214844, 'kl': 0.011418992653489113}, 'load_time_ms': 0.705, 'num_steps_sampled': 658800, 'update_time_ms': 2.683}",549,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.06412100791931,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,658800,658800,{},549,115,-84.39965680099289,2025-09-04_22-05-55,8.001022642143228,3651948,1757016355,-6.673317749432404,21177.441816091537,26091,10.521739130434783
+cda-server-2,False,21219.882758378983,"{'sample_time_ms': 42214.87, 'num_steps_trained': 660000, 'grad_time_ms': 374.429, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 133.2684326171875, 'policy_loss': -0.15127120912075043, 'vf_explained_var': 0.07853060215711594, 'entropy': 7.763479232788086, 'cur_lr': 4.999999873689376e-05, 'total_loss': 133.15692138671875, 'kl': 0.01163527276366949}, 'load_time_ms': 0.707, 'num_steps_sampled': 660000, 'update_time_ms': 2.704}",550,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.44094228744507,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,660000,660000,{},550,105,-59.6595376409971,2025-09-04_22-06-38,8.00063865225193,3651948,1757016398,-8.13644679828116,21219.882758378983,26196,11.342857142857143
+cda-server-2,False,21262.44256210327,"{'sample_time_ms': 42267.126, 'num_steps_trained': 661200, 'grad_time_ms': 371.657, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 106.95640563964844, 'policy_loss': -0.1507752686738968, 'vf_explained_var': 0.0752321109175682, 'entropy': 7.224944114685059, 'cur_lr': 4.999999873689376e-05, 'total_loss': 106.84326171875, 'kl': 0.011010591872036457}, 'load_time_ms': 0.695, 'num_steps_sampled': 661200, 'update_time_ms': 2.627}",551,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.55980372428894,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,661200,661200,{},551,116,-55.20205252954118,2025-09-04_22-07-20,8.00011688316286,3651948,1757016440,-6.506058239324532,21262.44256210327,26312,10.413793103448276
+cda-server-2,False,21304.658202648163,"{'sample_time_ms': 42136.556, 'num_steps_trained': 662400, 'grad_time_ms': 372.579, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 196.15733337402344, 'policy_loss': -0.14689193665981293, 'vf_explained_var': 0.066288523375988, 'entropy': 8.02047061920166, 'cur_lr': 4.999999873689376e-05, 'total_loss': 196.051025390625, 'kl': 0.011877370066940784}, 'load_time_ms': 0.696, 'num_steps_sampled': 662400, 'update_time_ms': 2.63}",552,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.21564054489136,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,662400,662400,{},552,97,-79.67103052029469,2025-09-04_22-08-03,8.00124451505714,3651948,1757016483,-9.210449480975706,21304.658202648163,26409,12.09
+cda-server-2,False,21348.155327796936,"{'sample_time_ms': 42215.424, 'num_steps_trained': 663600, 'grad_time_ms': 371.927, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 128.71673583984375, 'policy_loss': -0.12534157931804657, 'vf_explained_var': 0.07487869262695312, 'entropy': 7.696089744567871, 'cur_lr': 4.999999873689376e-05, 'total_loss': 128.62692260742188, 'kl': 0.010400541126728058}, 'load_time_ms': 0.683, 'num_steps_sampled': 663600, 'update_time_ms': 2.619}",553,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.49712514877319,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,663600,663600,{},553,119,-83.2373949676278,2025-09-04_22-08-46,8.00022207384256,3651948,1757016526,-6.358082025069993,21348.155327796936,26528,10.352941176470589
+cda-server-2,False,21391.43908548355,"{'sample_time_ms': 42303.912, 'num_steps_trained': 664800, 'grad_time_ms': 370.249, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 211.26190185546875, 'policy_loss': -0.12464182823896408, 'vf_explained_var': 0.05803931504487991, 'entropy': 8.136405944824219, 'cur_lr': 4.999999873689376e-05, 'total_loss': 211.18016052246094, 'kl': 0.012556111440062523}, 'load_time_ms': 0.693, 'num_steps_sampled': 664800, 'update_time_ms': 2.612}",554,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.28375768661499,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,664800,664800,{},554,107,-83.01182986977857,2025-09-04_22-09-29,8.000725324496882,3651948,1757016569,-7.971736350739526,21391.43908548355,26635,11.261682242990654
+cda-server-2,False,21434.582239627838,"{'sample_time_ms': 42376.524, 'num_steps_trained': 666000, 'grad_time_ms': 370.143, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 125.87883758544922, 'policy_loss': -0.14883939921855927, 'vf_explained_var': 0.07278783619403839, 'entropy': 7.4547247886657715, 'cur_lr': 4.999999873689376e-05, 'total_loss': 125.76923370361328, 'kl': 0.011482727713882923}, 'load_time_ms': 0.693, 'num_steps_sampled': 666000, 'update_time_ms': 2.578}",555,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.14315414428711,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,666000,666000,{},555,124,-53.88776320042786,2025-09-04_22-10-13,8.000704435079951,3651948,1757016613,-5.172610377890699,21434.582239627838,26759,9.661290322580646
+cda-server-2,False,21477.246037244797,"{'sample_time_ms': 42477.191, 'num_steps_trained': 667200, 'grad_time_ms': 368.311, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 143.22914123535156, 'policy_loss': -0.13829833269119263, 'vf_explained_var': 0.09276745468378067, 'entropy': 7.771801471710205, 'cur_lr': 4.999999873689376e-05, 'total_loss': 143.12408447265625, 'kl': 0.009724327363073826}, 'load_time_ms': 0.692, 'num_steps_sampled': 667200, 'update_time_ms': 2.582}",556,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.66379761695862,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,667200,667200,{},556,104,-59.879564087317746,2025-09-04_22-10-55,8.000214842733033,3651948,1757016655,-8.070791586781151,21477.246037244797,26863,11.35576923076923
+cda-server-2,False,21519.86287546158,"{'sample_time_ms': 42459.365, 'num_steps_trained': 668400, 'grad_time_ms': 366.616, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 221.02940368652344, 'policy_loss': -0.12886519730091095, 'vf_explained_var': 0.06552401930093765, 'entropy': 7.7118377685546875, 'cur_lr': 4.999999873689376e-05, 'total_loss': 220.93482971191406, 'kl': 0.010023903101682663}, 'load_time_ms': 0.677, 'num_steps_sampled': 668400, 'update_time_ms': 2.572}",557,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.616838216781616,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,668400,668400,{},557,112,-84.4354191148986,2025-09-04_22-11-38,8.000294390878413,3651948,1757016698,-7.5135190840836215,21519.86287546158,26975,10.964285714285714
+cda-server-2,False,21561.864362716675,"{'sample_time_ms': 42373.089, 'num_steps_trained': 669600, 'grad_time_ms': 367.431, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 139.33462524414062, 'policy_loss': -0.14680179953575134, 'vf_explained_var': 0.06770966947078705, 'entropy': 7.684025287628174, 'cur_lr': 4.999999873689376e-05, 'total_loss': 139.23477172851562, 'kl': 0.013734077103435993}, 'load_time_ms': 0.679, 'num_steps_sampled': 669600, 'update_time_ms': 2.563}",558,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.001487255096436,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,669600,669600,{},558,98,-58.38346503878951,2025-09-04_22-12-20,8.000045804554109,3651948,1757016740,-8.793975598499975,21561.864362716675,27073,11.87
+cda-server-2,False,21604.379405260086,"{'sample_time_ms': 42319.686, 'num_steps_trained': 670800, 'grad_time_ms': 365.936, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 145.839111328125, 'policy_loss': -0.1504814624786377, 'vf_explained_var': 0.07926931977272034, 'entropy': 7.459970951080322, 'cur_lr': 4.999999873689376e-05, 'total_loss': 145.7266845703125, 'kl': 0.011130633763968945}, 'load_time_ms': 0.682, 'num_steps_sampled': 670800, 'update_time_ms': 2.607}",559,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.515042543411255,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,670800,670800,{},559,107,-56.354664746722065,2025-09-04_22-13-02,8.000833928321104,3651948,1757016782,-8.075514133723603,21604.379405260086,27180,11.317757009345794
+cda-server-2,False,21647.21959042549,"{'sample_time_ms': 42357.983, 'num_steps_trained': 672000, 'grad_time_ms': 367.547, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 159.48631286621094, 'policy_loss': -0.13445042073726654, 'vf_explained_var': 0.09145782142877579, 'entropy': 7.627590656280518, 'cur_lr': 4.999999873689376e-05, 'total_loss': 159.3946990966797, 'kl': 0.012530959211289883}, 'load_time_ms': 0.681, 'num_steps_sampled': 672000, 'update_time_ms': 2.588}",560,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.84018516540527,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,672000,672000,{},560,115,-66.33505633171917,2025-09-04_22-13-45,8.00009120135479,3651948,1757016825,-6.0452420411654595,21647.21959042549,27295,10.173913043478262
+cda-server-2,False,21691.032320976257,"{'sample_time_ms': 42480.611, 'num_steps_trained': 673200, 'grad_time_ms': 370.136, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 120.06874084472656, 'policy_loss': -0.1362529695034027, 'vf_explained_var': 0.07216636091470718, 'entropy': 6.824906349182129, 'cur_lr': 4.999999873689376e-05, 'total_loss': 119.97330474853516, 'kl': 0.011949594132602215}, 'load_time_ms': 0.682, 'num_steps_sampled': 673200, 'update_time_ms': 2.609}",561,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.81273055076599,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,673200,673200,{},561,129,-52.10596212722329,2025-09-04_22-14-29,8.00011735640512,3651948,1757016869,-5.093333995757722,21691.032320976257,27424,9.527131782945736
+cda-server-2,False,21733.87010025978,"{'sample_time_ms': 42543.074, 'num_steps_trained': 674400, 'grad_time_ms': 369.86, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 105.38618469238281, 'policy_loss': -0.1297050267457962, 'vf_explained_var': 0.0827423632144928, 'entropy': 6.8894147872924805, 'cur_lr': 4.999999873689376e-05, 'total_loss': 105.29486083984375, 'kl': 0.011231918819248676}, 'load_time_ms': 0.684, 'num_steps_sampled': 674400, 'update_time_ms': 2.616}",562,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.83777928352356,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,674400,674400,{},562,119,-57.96373070942089,2025-09-04_22-15-12,8.000232362221062,3651948,1757016912,-6.140445976676297,21733.87010025978,27543,10.109243697478991
+cda-server-2,False,21776.769562005997,"{'sample_time_ms': 42484.513, 'num_steps_trained': 675600, 'grad_time_ms': 368.693, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 204.86996459960938, 'policy_loss': -0.13395991921424866, 'vf_explained_var': 0.0822979137301445, 'entropy': 7.190834999084473, 'cur_lr': 4.999999873689376e-05, 'total_loss': 204.776611328125, 'kl': 0.011884557083249092}, 'load_time_ms': 0.696, 'num_steps_sampled': 675600, 'update_time_ms': 2.574}",563,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.89946174621582,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,675600,675600,{},563,117,-81.75435628911879,2025-09-04_22-15-55,8.000441655375283,3651948,1757016955,-6.406610327960005,21776.769562005997,27660,10.333333333333334
+cda-server-2,False,21820.96600151062,"{'sample_time_ms': 42575.677, 'num_steps_trained': 676800, 'grad_time_ms': 368.815, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 180.16827392578125, 'policy_loss': -0.1367030143737793, 'vf_explained_var': 0.07517794519662857, 'entropy': 7.484841346740723, 'cur_lr': 4.999999873689376e-05, 'total_loss': 180.07052612304688, 'kl': 0.011397531256079674}, 'load_time_ms': 0.68, 'num_steps_sampled': 676800, 'update_time_ms': 2.553}",564,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",44.19643950462341,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,676800,676800,{},564,112,-64.82634901042515,2025-09-04_22-16-39,8.000092940990312,3651948,1757016999,-6.589729884115301,21820.96600151062,27772,10.517857142857142
+cda-server-2,False,21863.813071250916,"{'sample_time_ms': 42544.47, 'num_steps_trained': 678000, 'grad_time_ms': 370.431, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 198.84449768066406, 'policy_loss': -0.14143936336040497, 'vf_explained_var': 0.06615026295185089, 'entropy': 6.939062118530273, 'cur_lr': 4.999999873689376e-05, 'total_loss': 198.74237060546875, 'kl': 0.011502007953822613}, 'load_time_ms': 0.675, 'num_steps_sampled': 678000, 'update_time_ms': 2.561}",565,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.84706974029541,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,678000,678000,{},565,109,-86.60146354034627,2025-09-04_22-17-22,8.000039436187995,3651948,1757017042,-7.526828884385483,21863.813071250916,27881,10.853211009174313
+cda-server-2,False,21906.536709070206,"{'sample_time_ms': 42546.89, 'num_steps_trained': 679200, 'grad_time_ms': 373.969, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 147.68374633789062, 'policy_loss': -0.155631884932518, 'vf_explained_var': 0.06244520843029022, 'entropy': 7.263120174407959, 'cur_lr': 4.999999873689376e-05, 'total_loss': 147.57229614257812, 'kl': 0.012926424853503704}, 'load_time_ms': 0.692, 'num_steps_sampled': 679200, 'update_time_ms': 2.558}",566,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.72363781929016,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,679200,679200,{},566,107,-54.63605971540724,2025-09-04_22-18-05,8.000003062458518,3651948,1757017085,-7.979791943836602,21906.536709070206,27988,11.317757009345794
+cda-server-2,False,21949.84335541725,"{'sample_time_ms': 42616.995, 'num_steps_trained': 680400, 'grad_time_ms': 372.824, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 155.88766479492188, 'policy_loss': -0.13775783777236938, 'vf_explained_var': 0.058890633285045624, 'entropy': 7.494391441345215, 'cur_lr': 4.999999873689376e-05, 'total_loss': 155.78729248046875, 'kl': 0.010931625962257385}, 'load_time_ms': 0.69, 'num_steps_sampled': 680400, 'update_time_ms': 2.549}",567,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.3066463470459,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,680400,680400,{},567,112,-84.02993638479828,2025-09-04_22-18-48,8.00024140639188,3651948,1757017128,-7.55994395399298,21949.84335541725,28100,10.928571428571429
+cda-server-2,False,21992.533579826355,"{'sample_time_ms': 42686.179, 'num_steps_trained': 681600, 'grad_time_ms': 372.55, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 152.74652099609375, 'policy_loss': -0.15492337942123413, 'vf_explained_var': 0.058588724583387375, 'entropy': 7.213345050811768, 'cur_lr': 4.999999873689376e-05, 'total_loss': 152.63485717773438, 'kl': 0.012657254002988338}, 'load_time_ms': 0.694, 'num_steps_sampled': 681600, 'update_time_ms': 2.546}",568,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.690224409103394,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,681600,681600,{},568,110,-50.905645282197014,2025-09-04_22-19-31,8.000048079203719,3651948,1757017171,-7.11440875099205,21992.533579826355,28210,10.745454545454546
+cda-server-2,False,22035.043923854828,"{'sample_time_ms': 42684.105, 'num_steps_trained': 682800, 'grad_time_ms': 374.156, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 135.70074462890625, 'policy_loss': -0.14947375655174255, 'vf_explained_var': 0.05739326775074005, 'entropy': 6.931785583496094, 'cur_lr': 4.999999873689376e-05, 'total_loss': 135.59986877441406, 'kl': 0.01421891525387764}, 'load_time_ms': 0.695, 'num_steps_sampled': 682800, 'update_time_ms': 2.539}",569,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.5103440284729,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,682800,682800,{},569,106,-62.53338617710244,2025-09-04_22-20-13,8.000107733981825,3651948,1757017213,-8.046965783119704,22035.043923854828,28316,11.339622641509434
+cda-server-2,False,22077.84760403633,"{'sample_time_ms': 42678.69, 'num_steps_trained': 684000, 'grad_time_ms': 375.862, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 112.93285369873047, 'policy_loss': -0.1449345052242279, 'vf_explained_var': 0.08518168330192566, 'entropy': 7.19413423538208, 'cur_lr': 4.999999873689376e-05, 'total_loss': 112.82575225830078, 'kl': 0.011071660555899143}, 'load_time_ms': 0.711, 'num_steps_sampled': 684000, 'update_time_ms': 2.536}",570,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.803680181503296,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,684000,684000,{},570,122,-48.24851714742445,2025-09-04_22-20-56,8.000055241613577,3651948,1757017256,-5.624446084612276,22077.84760403633,28438,9.918032786885245
+cda-server-2,False,22120.38676905632,"{'sample_time_ms': 42551.255, 'num_steps_trained': 685200, 'grad_time_ms': 375.928, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 133.19151306152344, 'policy_loss': -0.1502912938594818, 'vf_explained_var': 0.07893572002649307, 'entropy': 7.2277045249938965, 'cur_lr': 4.999999873689376e-05, 'total_loss': 133.07899475097656, 'kl': 0.011048024520277977}, 'load_time_ms': 0.722, 'num_steps_sampled': 685200, 'update_time_ms': 2.572}",571,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.539165019989014,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,685200,685200,{},571,113,-85.25417927993146,2025-09-04_22-21-39,8.000087952482096,3651948,1757017299,-7.091387358571522,22120.38676905632,28551,10.690265486725664
+cda-server-2,False,22162.90586090088,"{'sample_time_ms': 42517.234, 'num_steps_trained': 686400, 'grad_time_ms': 378.059, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 182.99456787109375, 'policy_loss': -0.13071544468402863, 'vf_explained_var': 0.07520709931850433, 'entropy': 7.410926818847656, 'cur_lr': 4.999999873689376e-05, 'total_loss': 182.89974975585938, 'kl': 0.010507463477551937}, 'load_time_ms': 0.725, 'num_steps_sampled': 686400, 'update_time_ms': 2.572}",572,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.519091844558716,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,686400,686400,{},572,115,-82.94677043031537,2025-09-04_22-22-21,8.000496942292274,3651948,1757017341,-6.136495311014474,22162.90586090088,28666,10.156521739130435
+cda-server-2,False,22205.807220220566,"{'sample_time_ms': 42518.564, 'num_steps_trained': 687600, 'grad_time_ms': 376.914, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 122.54331970214844, 'policy_loss': -0.13683006167411804, 'vf_explained_var': 0.0526929572224617, 'entropy': 7.163827896118164, 'cur_lr': 4.999999873689376e-05, 'total_loss': 122.44371032714844, 'kl': 0.010894465260207653}, 'load_time_ms': 0.711, 'num_steps_sampled': 687600, 'update_time_ms': 2.605}",573,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.90135931968689,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,687600,687600,{},573,122,-68.4526222955332,2025-09-04_22-23-04,10.0,3651948,1757017384,-5.682447918716967,22205.807220220566,28788,9.959016393442623
+cda-server-2,False,22248.536956310272,"{'sample_time_ms': 42370.332, 'num_steps_trained': 688800, 'grad_time_ms': 378.452, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 209.05628967285156, 'policy_loss': -0.1234748512506485, 'vf_explained_var': 0.06602538377046585, 'entropy': 7.409505367279053, 'cur_lr': 4.999999873689376e-05, 'total_loss': 208.97499084472656, 'kl': 0.012348907068371773}, 'load_time_ms': 0.709, 'num_steps_sampled': 688800, 'update_time_ms': 2.581}",574,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.72973608970642,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,688800,688800,{},574,109,-74.35816990238168,2025-09-04_22-23-47,8.00172516925269,3651948,1757017427,-7.325131015657343,22248.536956310272,28897,10.972477064220184
+cda-server-2,False,22291.571404218674,"{'sample_time_ms': 42390.463, 'num_steps_trained': 690000, 'grad_time_ms': 377.016, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 130.4335174560547, 'policy_loss': -0.1544542908668518, 'vf_explained_var': 0.06219371780753136, 'entropy': 6.913965225219727, 'cur_lr': 4.999999873689376e-05, 'total_loss': 130.3199005126953, 'kl': 0.011956276372075081}, 'load_time_ms': 0.722, 'num_steps_sampled': 690000, 'update_time_ms': 2.576}",575,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.03444790840149,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,690000,690000,{},575,113,-54.22765412618301,2025-09-04_22-24-30,8.000057655930751,3651948,1757017470,-7.287234470903005,22291.571404218674,29010,10.858407079646017
+cda-server-2,False,22335.940213918686,"{'sample_time_ms': 42558.047, 'num_steps_trained': 691200, 'grad_time_ms': 373.959, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 168.27389526367188, 'policy_loss': -0.13689130544662476, 'vf_explained_var': 0.07498934864997864, 'entropy': 7.524739742279053, 'cur_lr': 4.999999873689376e-05, 'total_loss': 168.1717071533203, 'kl': 0.010151694528758526}, 'load_time_ms': 0.708, 'num_steps_sampled': 691200, 'update_time_ms': 2.596}",576,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",44.36880970001221,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,691200,691200,{},576,110,-70.2475338971699,2025-09-04_22-25-14,8.00002636638029,3651948,1757017514,-6.757685599026747,22335.940213918686,29120,10.672727272727272
+cda-server-2,False,22378.950961351395,"{'sample_time_ms': 42525.88, 'num_steps_trained': 692400, 'grad_time_ms': 376.477, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 136.9923553466797, 'policy_loss': -0.140699565410614, 'vf_explained_var': 0.09320604801177979, 'entropy': 7.144749164581299, 'cur_lr': 4.999999873689376e-05, 'total_loss': 136.8886260986328, 'kl': 0.010824107564985752}, 'load_time_ms': 0.72, 'num_steps_sampled': 692400, 'update_time_ms': 2.585}",577,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.01074743270874,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,692400,692400,{},577,126,-54.93737929459731,2025-09-04_22-25-57,8.000693779503983,3651948,1757017557,-5.148831116193629,22378.950961351395,29246,9.642857142857142
+cda-server-2,False,22423.185261964798,"{'sample_time_ms': 42680.617, 'num_steps_trained': 693600, 'grad_time_ms': 376.063, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 174.09930419921875, 'policy_loss': -0.1358547955751419, 'vf_explained_var': 0.07100100070238113, 'entropy': 6.8559722900390625, 'cur_lr': 4.999999873689376e-05, 'total_loss': 174.0040283203125, 'kl': 0.01187346875667572}, 'load_time_ms': 0.721, 'num_steps_sampled': 693600, 'update_time_ms': 2.612}",578,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",44.23430061340332,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,693600,693600,{},578,118,-83.06991680825949,2025-09-04_22-26-42,8.00003230595356,3651948,1757017602,-6.179279727686898,22423.185261964798,29364,10.177966101694915
+cda-server-2,False,22466.4621822834,"{'sample_time_ms': 42756.586, 'num_steps_trained': 694800, 'grad_time_ms': 376.764, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 139.19488525390625, 'policy_loss': -0.13023453950881958, 'vf_explained_var': 0.07461045682430267, 'entropy': 6.996879577636719, 'cur_lr': 4.999999873689376e-05, 'total_loss': 139.10948181152344, 'kl': 0.013118831440806389}, 'load_time_ms': 0.716, 'num_steps_sampled': 694800, 'update_time_ms': 2.601}",579,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.276920318603516,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,694800,694800,{},579,121,-47.700735280103274,2025-09-04_22-27-25,8.000156903274824,3651948,1757017645,-4.957898628346339,22466.4621822834,29485,9.537190082644628
+cda-server-2,False,22509.432641267776,"{'sample_time_ms': 42776.12, 'num_steps_trained': 696000, 'grad_time_ms': 373.96, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 94.98158264160156, 'policy_loss': -0.1420704573392868, 'vf_explained_var': 0.07674945890903473, 'entropy': 7.213596820831299, 'cur_lr': 4.999999873689376e-05, 'total_loss': 94.87638092041016, 'kl': 0.010787763632833958}, 'load_time_ms': 0.707, 'num_steps_sampled': 696000, 'update_time_ms': 2.622}",580,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.970458984375,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,696000,696000,{},580,119,-66.57485710214135,2025-09-04_22-28-08,8.000062790663561,3651948,1757017688,-6.65762503572369,22509.432641267776,29604,10.504201680672269
+cda-server-2,False,22552.289803743362,"{'sample_time_ms': 42810.573, 'num_steps_trained': 697200, 'grad_time_ms': 371.38, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 128.8344268798828, 'policy_loss': -0.1312500238418579, 'vf_explained_var': 0.09103263169527054, 'entropy': 7.415947914123535, 'cur_lr': 4.999999873689376e-05, 'total_loss': 128.74192810058594, 'kl': 0.011338096112012863}, 'load_time_ms': 0.701, 'num_steps_sampled': 697200, 'update_time_ms': 2.526}",581,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.85716247558594,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,697200,697200,{},581,121,-57.68848140224077,2025-09-04_22-28-51,8.00019220691321,3651948,1757017731,-5.39508747106984,22552.289803743362,29725,9.84297520661157
+cda-server-2,False,22595.954249620438,"{'sample_time_ms': 42927.164, 'num_steps_trained': 698400, 'grad_time_ms': 369.282, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 122.69112396240234, 'policy_loss': -0.1308661699295044, 'vf_explained_var': 0.07427005469799042, 'entropy': 7.18864107131958, 'cur_lr': 4.999999873689376e-05, 'total_loss': 122.60426330566406, 'kl': 0.012876071035861969}, 'load_time_ms': 0.698, 'num_steps_sampled': 698400, 'update_time_ms': 2.557}",582,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.664445877075195,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,698400,698400,{},582,117,-57.31496480522384,2025-09-04_22-29-35,8.000219740887788,3651948,1757017775,-5.87037731655807,22595.954249620438,29842,10.136752136752136
+cda-server-2,False,22638.80445575714,"{'sample_time_ms': 42919.357, 'num_steps_trained': 699600, 'grad_time_ms': 371.931, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 115.34535217285156, 'policy_loss': -0.12993019819259644, 'vf_explained_var': 0.09734512865543365, 'entropy': 7.098825454711914, 'cur_lr': 4.999999873689376e-05, 'total_loss': 115.25001525878906, 'kl': 0.01012202724814415}, 'load_time_ms': 0.712, 'num_steps_sampled': 699600, 'update_time_ms': 2.582}",583,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.85020613670349,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,699600,699600,{},583,128,-56.91603174569671,2025-09-04_22-30-17,8.000145350968783,3651948,1757017817,-4.829231435358762,22638.80445575714,29970,9.515625
+cda-server-2,False,22684.14041852951,"{'sample_time_ms': 43181.283, 'num_steps_trained': 700800, 'grad_time_ms': 370.648, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 133.59213256835938, 'policy_loss': -0.14230309426784515, 'vf_explained_var': 0.06981848180294037, 'entropy': 6.67290735244751, 'cur_lr': 4.999999873689376e-05, 'total_loss': 133.49095153808594, 'kl': 0.01203584298491478}, 'load_time_ms': 0.725, 'num_steps_sampled': 700800, 'update_time_ms': 2.588}",584,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",45.335962772369385,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,700800,700800,{},584,123,-57.59628058778699,2025-09-04_22-31-03,8.000117909117819,3651948,1757017863,-5.26294073240756,22684.14041852951,30093,9.691056910569106
+cda-server-2,False,22727.164251089096,"{'sample_time_ms': 43179.759, 'num_steps_trained': 702000, 'grad_time_ms': 371.127, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 142.23223876953125, 'policy_loss': -0.12671436369419098, 'vf_explained_var': 0.08032892644405365, 'entropy': 7.343288898468018, 'cur_lr': 4.999999873689376e-05, 'total_loss': 142.15101623535156, 'kl': 0.01330479048192501}, 'load_time_ms': 0.717, 'num_steps_sampled': 702000, 'update_time_ms': 2.585}",585,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.02383255958557,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,702000,702000,{},585,119,-63.36495358766278,2025-09-04_22-31-46,8.000246534700047,3651948,1757017906,-5.878659310265259,22727.164251089096,30212,10.109243697478991
+cda-server-2,False,22770.384961128235,"{'sample_time_ms': 43063.488, 'num_steps_trained': 703200, 'grad_time_ms': 372.602, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 91.35843658447266, 'policy_loss': -0.15350015461444855, 'vf_explained_var': 0.1072445660829544, 'entropy': 7.050044536590576, 'cur_lr': 4.999999873689376e-05, 'total_loss': 91.24449157714844, 'kl': 0.011577222496271133}, 'load_time_ms': 0.715, 'num_steps_sampled': 703200, 'update_time_ms': 2.567}",586,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.220710039138794,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,703200,703200,{},586,125,-47.96183202814019,2025-09-04_22-32-29,8.000505277038918,3651948,1757017949,-5.040616298416622,22770.384961128235,30337,9.56
+cda-server-2,False,22814.914575338364,"{'sample_time_ms': 43215.761, 'num_steps_trained': 704400, 'grad_time_ms': 372.286, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 93.92406463623047, 'policy_loss': -0.12293097376823425, 'vf_explained_var': 0.12784144282341003, 'entropy': 6.780979156494141, 'cur_lr': 4.999999873689376e-05, 'total_loss': 93.8482666015625, 'kl': 0.013793894089758396}, 'load_time_ms': 0.705, 'num_steps_sampled': 704400, 'update_time_ms': 2.583}",587,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",44.529614210128784,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,704400,704400,{},587,120,-42.88386129363812,2025-09-04_22-33-14,8.000040180223671,3651948,1757017994,-5.753513069942545,22814.914575338364,30457,9.966666666666667
+cda-server-2,False,22859.455120801926,"{'sample_time_ms': 43246.856, 'num_steps_trained': 705600, 'grad_time_ms': 371.89, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 77.65475463867188, 'policy_loss': -0.13246053457260132, 'vf_explained_var': 0.09905927628278732, 'entropy': 6.938627243041992, 'cur_lr': 4.999999873689376e-05, 'total_loss': 77.56002044677734, 'kl': 0.01103940699249506}, 'load_time_ms': 0.7, 'num_steps_sampled': 705600, 'update_time_ms': 2.552}",588,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",44.54054546356201,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,705600,705600,{},588,149,-44.73748164195726,2025-09-04_22-33-58,8.00008797216531,3651948,1757018038,-2.6113752468583518,22859.455120801926,30606,8.12751677852349
+cda-server-2,False,22903.96373772621,"{'sample_time_ms': 43369.467, 'num_steps_trained': 706800, 'grad_time_ms': 372.391, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 121.15181732177734, 'policy_loss': -0.1183452308177948, 'vf_explained_var': 0.07469463348388672, 'entropy': 6.563516139984131, 'cur_lr': 4.999999873689376e-05, 'total_loss': 121.072265625, 'kl': 0.011357057839632034}, 'load_time_ms': 0.716, 'num_steps_sampled': 706800, 'update_time_ms': 2.517}",589,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",44.50861692428589,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,706800,706800,{},589,135,-71.19111453046314,2025-09-04_22-34-43,8.000053933585503,3651948,1757018083,-4.193839701913829,22903.96373772621,30741,8.992592592592592
+cda-server-2,False,22947.06484746933,"{'sample_time_ms': 43382.958, 'num_steps_trained': 708000, 'grad_time_ms': 371.98, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 141.84280395507812, 'policy_loss': -0.12761473655700684, 'vf_explained_var': 0.0985998809337616, 'entropy': 7.402409076690674, 'cur_lr': 4.999999873689376e-05, 'total_loss': 141.76817321777344, 'kl': 0.01550805103033781}, 'load_time_ms': 0.707, 'num_steps_sampled': 708000, 'update_time_ms': 2.512}",590,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.101109743118286,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,708000,708000,{},590,121,-71.80392788298023,2025-09-04_22-35-26,8.000476339571728,3651948,1757018126,-5.264783734877008,22947.06484746933,30862,9.801652892561984
+cda-server-2,False,22990.538947582245,"{'sample_time_ms': 43443.705, 'num_steps_trained': 709200, 'grad_time_ms': 372.915, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 119.58562469482422, 'policy_loss': -0.14314739406108856, 'vf_explained_var': 0.08791525661945343, 'entropy': 7.169406414031982, 'cur_lr': 4.999999873689376e-05, 'total_loss': 119.47736358642578, 'kl': 0.010210275650024414}, 'load_time_ms': 0.701, 'num_steps_sampled': 709200, 'update_time_ms': 2.575}",591,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.47410011291504,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,709200,709200,{},591,115,-57.33659681872386,2025-09-04_22-36-09,8.000032141378654,3651948,1757018169,-6.406518425550268,22990.538947582245,30977,10.417391304347825
+cda-server-2,False,23034.84391260147,"{'sample_time_ms': 43507.588, 'num_steps_trained': 710400, 'grad_time_ms': 373.119, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 151.1319122314453, 'policy_loss': -0.13063140213489532, 'vf_explained_var': 0.04749082773923874, 'entropy': 7.191324234008789, 'cur_lr': 4.999999873689376e-05, 'total_loss': 151.0413360595703, 'kl': 0.011724242940545082}, 'load_time_ms': 0.703, 'num_steps_sampled': 710400, 'update_time_ms': 2.528}",592,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",44.304965019226074,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,710400,710400,{},592,132,-48.77108419299691,2025-09-04_22-36-54,8.000195820089234,3651948,1757018214,-3.725944518014065,23034.84391260147,31109,8.856060606060606
+cda-server-2,False,23080.382224321365,"{'sample_time_ms': 43777.358, 'num_steps_trained': 711600, 'grad_time_ms': 372.202, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 107.67868041992188, 'policy_loss': -0.138823002576828, 'vf_explained_var': 0.07654394954442978, 'entropy': 6.84686279296875, 'cur_lr': 4.999999873689376e-05, 'total_loss': 107.58744812011719, 'kl': 0.013926461338996887}, 'load_time_ms': 0.71, 'num_steps_sampled': 711600, 'update_time_ms': 2.488}",593,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",45.53831171989441,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,711600,711600,{},593,133,-66.42029840822644,2025-09-04_22-37-39,8.000138517453546,3651948,1757018259,-4.4259443809062144,23080.382224321365,31242,9.210526315789474
+cda-server-2,False,23125.05748963356,"{'sample_time_ms': 43711.507, 'num_steps_trained': 712800, 'grad_time_ms': 371.969, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 156.84869384765625, 'policy_loss': -0.12441955506801605, 'vf_explained_var': 0.08332864940166473, 'entropy': 6.674763202667236, 'cur_lr': 4.999999873689376e-05, 'total_loss': 156.7620086669922, 'kl': 0.011046051979064941}, 'load_time_ms': 0.707, 'num_steps_sampled': 712800, 'update_time_ms': 2.481}",594,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",44.675265312194824,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,712800,712800,{},594,125,-68.77151196250325,2025-09-04_22-38-24,8.001414850232228,3651948,1757018304,-4.839392487965561,23125.05748963356,31367,9.4
+cda-server-2,False,23168.927923202515,"{'sample_time_ms': 43795.331, 'num_steps_trained': 714000, 'grad_time_ms': 372.795, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 118.53201293945312, 'policy_loss': -0.12804369628429413, 'vf_explained_var': 0.0854819267988205, 'entropy': 6.8159966468811035, 'cur_lr': 4.999999873689376e-05, 'total_loss': 118.44461822509766, 'kl': 0.01189707312732935}, 'load_time_ms': 0.715, 'num_steps_sampled': 714000, 'update_time_ms': 2.454}",595,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.87043356895447,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,714000,714000,{},595,150,-84.89429218265892,2025-09-04_22-39-08,8.000200131851521,3651948,1757018348,-2.795488517206502,23168.927923202515,31517,8.213333333333333
+cda-server-2,False,23212.9437186718,"{'sample_time_ms': 43874.525, 'num_steps_trained': 715200, 'grad_time_ms': 373.034, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 89.91404724121094, 'policy_loss': -0.13218335807323456, 'vf_explained_var': 0.1509593278169632, 'entropy': 6.593450546264648, 'cur_lr': 4.999999873689376e-05, 'total_loss': 89.82585906982422, 'kl': 0.012874918058514595}, 'load_time_ms': 0.723, 'num_steps_sampled': 715200, 'update_time_ms': 2.448}",596,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",44.01579546928406,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,715200,715200,{},596,146,-46.89106432634683,2025-09-04_22-39-52,8.000059176342543,3651948,1757018392,-2.718115021179101,23212.9437186718,31663,8.184931506849315
+cda-server-2,False,23256.91022491455,"{'sample_time_ms': 43817.494, 'num_steps_trained': 716400, 'grad_time_ms': 373.682, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 131.62042236328125, 'policy_loss': -0.12891115248203278, 'vf_explained_var': 0.0633602887392044, 'entropy': 6.955478191375732, 'cur_lr': 4.999999873689376e-05, 'total_loss': 131.52967834472656, 'kl': 0.011169587261974812}, 'load_time_ms': 0.73, 'num_steps_sampled': 716400, 'update_time_ms': 2.46}",597,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.966506242752075,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,716400,716400,{},597,133,-60.22632302654492,2025-09-04_22-40-36,8.000883760389728,3651948,1757018436,-4.196563868691215,23256.91022491455,31796,9.090225563909774
+cda-server-2,False,23301.675994873047,"{'sample_time_ms': 43840.693, 'num_steps_trained': 717600, 'grad_time_ms': 372.935, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 108.18599700927734, 'policy_loss': -0.118685282766819, 'vf_explained_var': 0.11735150218009949, 'entropy': 6.502457141876221, 'cur_lr': 4.999999873689376e-05, 'total_loss': 108.11123657226562, 'kl': 0.012854685075581074}, 'load_time_ms': 0.73, 'num_steps_sampled': 717600, 'update_time_ms': 2.517}",598,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",44.765769958496094,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,717600,717600,{},598,152,-67.82491122735355,2025-09-04_22-41-21,8.00003680404728,3651948,1757018481,-2.197598254975347,23301.675994873047,31948,7.848684210526316
+cda-server-2,False,23344.57385659218,"{'sample_time_ms': 43682.56, 'num_steps_trained': 718800, 'grad_time_ms': 370.037, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 133.1768798828125, 'policy_loss': -0.13704553246498108, 'vf_explained_var': 0.09473087638616562, 'entropy': 6.754085063934326, 'cur_lr': 4.999999873689376e-05, 'total_loss': 133.0796356201172, 'kl': 0.011645477265119553}, 'load_time_ms': 0.722, 'num_steps_sampled': 718800, 'update_time_ms': 2.56}",599,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.89786171913147,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,718800,718800,{},599,120,-63.29919197088866,2025-09-04_22-42-04,8.000100690883038,3651948,1757018524,-5.870295952205978,23344.57385659218,32068,10.083333333333334
+cda-server-2,False,23387.79086279869,"{'sample_time_ms': 43693.575, 'num_steps_trained': 720000, 'grad_time_ms': 370.578, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 89.28089141845703, 'policy_loss': -0.14922183752059937, 'vf_explained_var': 0.11266271024942398, 'entropy': 6.479259014129639, 'cur_lr': 4.999999873689376e-05, 'total_loss': 89.17571258544922, 'kl': 0.012887951917946339}, 'load_time_ms': 0.722, 'num_steps_sampled': 720000, 'update_time_ms': 2.536}",600,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.21700620651245,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,720000,720000,{},600,129,-47.30832296448869,2025-09-04_22-42-47,8.00003757225402,3651948,1757018567,-4.727779757085776,23387.79086279869,32197,9.294573643410853
+cda-server-2,False,23431.2193338871,"{'sample_time_ms': 43686.949, 'num_steps_trained': 721200, 'grad_time_ms': 372.533, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 95.62168884277344, 'policy_loss': -0.12089570611715317, 'vf_explained_var': 0.09228457510471344, 'entropy': 6.7667927742004395, 'cur_lr': 4.999999873689376e-05, 'total_loss': 95.5384521484375, 'kl': 0.01102022361010313}, 'load_time_ms': 0.723, 'num_steps_sampled': 721200, 'update_time_ms': 2.559}",601,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.428471088409424,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,721200,721200,{},601,138,-57.95461426237044,2025-09-04_22-43-30,8.000672331053476,3651948,1757018610,-3.609454058582472,23431.2193338871,32335,8.72463768115942
+cda-server-2,False,23474.939210653305,"{'sample_time_ms': 43626.161, 'num_steps_trained': 722400, 'grad_time_ms': 374.735, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 74.19865417480469, 'policy_loss': -0.12589259445667267, 'vf_explained_var': 0.11768775433301926, 'entropy': 5.9769673347473145, 'cur_lr': 4.999999873689376e-05, 'total_loss': 74.11507415771484, 'kl': 0.01238187775015831}, 'load_time_ms': 0.727, 'num_steps_sampled': 722400, 'update_time_ms': 2.576}",602,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.719876766204834,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,722400,722400,{},602,155,-38.30536829261998,2025-09-04_22-44-14,8.000132867335541,3651948,1757018654,-1.9368118449596428,23474.939210653305,32490,7.658064516129032
+cda-server-2,False,23519.12417769432,"{'sample_time_ms': 43490.588, 'num_steps_trained': 723600, 'grad_time_ms': 374.963, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 83.46903991699219, 'policy_loss': -0.1172410100698471, 'vf_explained_var': 0.1085447371006012, 'entropy': 6.539908409118652, 'cur_lr': 4.999999873689376e-05, 'total_loss': 83.40992736816406, 'kl': 0.017011698335409164}, 'load_time_ms': 0.713, 'num_steps_sampled': 723600, 'update_time_ms': 2.587}",603,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",44.184967041015625,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,723600,723600,{},603,139,-45.90818513752913,2025-09-04_22-44-58,8.000326986938695,3651948,1757018698,-3.7029299404763436,23519.12417769432,32629,8.762589928057555
+cda-server-2,False,23562.857536554337,"{'sample_time_ms': 43395.735, 'num_steps_trained': 724800, 'grad_time_ms': 375.529, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 103.1093978881836, 'policy_loss': -0.1405450701713562, 'vf_explained_var': 0.11952368170022964, 'entropy': 6.960071086883545, 'cur_lr': 4.999999873689376e-05, 'total_loss': 103.00732421875, 'kl': 0.011259738355875015}, 'load_time_ms': 0.704, 'num_steps_sampled': 724800, 'update_time_ms': 2.648}",604,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.73335886001587,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,724800,724800,{},604,128,-47.65175200995771,2025-09-04_22-45-42,8.000083883919991,3651948,1757018742,-4.491569973848575,23562.857536554337,32757,9.2578125
+cda-server-2,False,23606.357277154922,"{'sample_time_ms': 43361.803, 'num_steps_trained': 726000, 'grad_time_ms': 372.361, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 90.22853088378906, 'policy_loss': -0.12851108610630035, 'vf_explained_var': 0.12002600729465485, 'entropy': 6.432928085327148, 'cur_lr': 4.999999873689376e-05, 'total_loss': 90.14180755615234, 'kl': 0.012228470295667648}, 'load_time_ms': 0.696, 'num_steps_sampled': 726000, 'update_time_ms': 2.712}",605,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.49974060058594,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,726000,726000,{},605,132,-53.97147874809213,2025-09-04_22-46-25,8.000298934506844,3651948,1757018785,-4.387812240105299,23606.357277154922,32889,9.151515151515152
+cda-server-2,False,23650.741208314896,"{'sample_time_ms': 43399.828, 'num_steps_trained': 727200, 'grad_time_ms': 371.198, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 100.70307922363281, 'policy_loss': -0.14291277527809143, 'vf_explained_var': 0.10846755653619766, 'entropy': 6.764704704284668, 'cur_lr': 4.999999873689376e-05, 'total_loss': 100.59479522705078, 'kl': 0.01013493537902832}, 'load_time_ms': 0.691, 'num_steps_sampled': 727200, 'update_time_ms': 2.721}",606,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",44.383931159973145,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,727200,727200,{},606,148,-53.87825800993699,2025-09-04_22-47-10,8.001652829584234,3651948,1757018830,-2.2901905647498775,23650.741208314896,33037,7.9324324324324325
+cda-server-2,False,23695.062483549118,"{'sample_time_ms': 43436.722, 'num_steps_trained': 728400, 'grad_time_ms': 369.762, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 62.03611755371094, 'policy_loss': -0.12948085367679596, 'vf_explained_var': 0.12581761181354523, 'entropy': 6.543361663818359, 'cur_lr': 4.999999873689376e-05, 'total_loss': 61.94514465332031, 'kl': 0.011268743313848972}, 'load_time_ms': 0.695, 'num_steps_sampled': 728400, 'update_time_ms': 2.728}",607,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",44.32127523422241,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,728400,728400,{},607,155,-50.7471301644085,2025-09-04_22-47-54,10.0,3651948,1757018874,-2.3390846226268893,23695.062483549118,33192,7.941935483870968
+cda-server-2,False,23738.654263973236,"{'sample_time_ms': 43317.573, 'num_steps_trained': 729600, 'grad_time_ms': 371.483, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 99.25454711914062, 'policy_loss': -0.12160660326480865, 'vf_explained_var': 0.1230609118938446, 'entropy': 6.74321174621582, 'cur_lr': 4.999999873689376e-05, 'total_loss': 99.17286682128906, 'kl': 0.011683410033583641}, 'load_time_ms': 0.704, 'num_steps_sampled': 729600, 'update_time_ms': 2.714}",608,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.59178042411804,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,729600,729600,{},608,139,-48.03150689939424,2025-09-04_22-48-38,8.00045191681889,3651948,1757018918,-2.999305298071618,23738.654263973236,33331,8.388489208633093
+cda-server-2,False,23782.10429239273,"{'sample_time_ms': 43369.897, 'num_steps_trained': 730800, 'grad_time_ms': 374.336, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 75.23072052001953, 'policy_loss': -0.12343779951334, 'vf_explained_var': 0.12303393334150314, 'entropy': 6.567841529846191, 'cur_lr': 4.999999873689376e-05, 'total_loss': 75.13880920410156, 'kl': 0.009223658591508865}, 'load_time_ms': 0.702, 'num_steps_sampled': 730800, 'update_time_ms': 2.714}",609,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.45002841949463,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,730800,730800,{},609,148,-52.06682091221565,2025-09-04_22-49-21,8.001783886918624,3651948,1757018961,-2.7639513994668614,23782.10429239273,33479,8.243243243243244
+cda-server-2,False,23825.765317440033,"{'sample_time_ms': 43413.4, 'num_steps_trained': 732000, 'grad_time_ms': 375.169, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 103.58061981201172, 'policy_loss': -0.12942402064800262, 'vf_explained_var': 0.09735243767499924, 'entropy': 6.212673664093018, 'cur_lr': 4.999999873689376e-05, 'total_loss': 103.48749542236328, 'kl': 0.010623730719089508}, 'load_time_ms': 0.703, 'num_steps_sampled': 732000, 'update_time_ms': 2.789}",610,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.661025047302246,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,732000,732000,{},610,143,-62.01605145781362,2025-09-04_22-50-05,8.000033550722925,3651948,1757019005,-2.9132776826894746,23825.765317440033,33622,8.286713286713287
+cda-server-2,False,23870.447543382645,"{'sample_time_ms': 43539.937, 'num_steps_trained': 733200, 'grad_time_ms': 374.066, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 87.480712890625, 'policy_loss': -0.1381773203611374, 'vf_explained_var': 0.13044473528862, 'entropy': 6.746407985687256, 'cur_lr': 4.999999873689376e-05, 'total_loss': 87.3785171508789, 'kl': 0.010530880652368069}, 'load_time_ms': 0.7, 'num_steps_sampled': 733200, 'update_time_ms': 2.733}",611,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",44.682225942611694,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,733200,733200,{},611,156,-47.74921806817137,2025-09-04_22-50-50,8.0000941196506,3651948,1757019050,-2.0150252691354487,23870.447543382645,33778,7.8076923076923075
+cda-server-2,False,23913.03826022148,"{'sample_time_ms': 43428.226, 'num_steps_trained': 734400, 'grad_time_ms': 372.778, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 115.05916595458984, 'policy_loss': -0.13818299770355225, 'vf_explained_var': 0.08191430568695068, 'entropy': 6.76662540435791, 'cur_lr': 4.999999873689376e-05, 'total_loss': 114.96686553955078, 'kl': 0.013425699435174465}, 'load_time_ms': 0.691, 'num_steps_sampled': 734400, 'update_time_ms': 2.8}",612,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",42.59071683883667,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,734400,734400,{},612,115,-49.67916648419274,2025-09-04_22-51-32,8.000300123651812,3651948,1757019092,-6.248643446342032,23913.03826022148,33893,10.339130434782609
+cda-server-2,False,23957.83992266655,"{'sample_time_ms': 43489.08, 'num_steps_trained': 735600, 'grad_time_ms': 373.518, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 80.71265411376953, 'policy_loss': -0.13465330004692078, 'vf_explained_var': 0.11793039739131927, 'entropy': 6.302291393280029, 'cur_lr': 4.999999873689376e-05, 'total_loss': 80.63780212402344, 'kl': 0.017498981207609177}, 'load_time_ms': 0.701, 'num_steps_sampled': 735600, 'update_time_ms': 2.832}",613,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",44.80166244506836,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,735600,735600,{},613,152,-43.75999672188685,2025-09-04_22-52-17,8.00014029262937,3651948,1757019137,-2.481013239972887,23957.83992266655,34045,7.980263157894737
+cda-server-2,False,24001.833251714706,"{'sample_time_ms': 43512.734, 'num_steps_trained': 736800, 'grad_time_ms': 375.891, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 87.89940643310547, 'policy_loss': -0.1337495893239975, 'vf_explained_var': 0.1053292378783226, 'entropy': 6.355923652648926, 'cur_lr': 4.999999873689376e-05, 'total_loss': 87.80087280273438, 'kl': 0.010308354161679745}, 'load_time_ms': 0.715, 'num_steps_sampled': 736800, 'update_time_ms': 2.79}",614,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.99332904815674,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,736800,736800,{},614,151,-52.692293882623645,2025-09-04_22-53-01,8.00005365016508,3651948,1757019181,-2.3395078662194324,24001.833251714706,34196,7.940397350993377
+cda-server-2,False,24045.63971400261,"{'sample_time_ms': 43541.909, 'num_steps_trained': 738000, 'grad_time_ms': 377.403, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 59.33376693725586, 'policy_loss': -0.11895520240068436, 'vf_explained_var': 0.13744482398033142, 'entropy': 6.331945419311523, 'cur_lr': 4.999999873689376e-05, 'total_loss': 59.25489807128906, 'kl': 0.011729689314961433}, 'load_time_ms': 0.713, 'num_steps_sampled': 738000, 'update_time_ms': 2.736}",615,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.80646228790283,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,738000,738000,{},615,154,-48.34869160915936,2025-09-04_22-53-45,8.000059264052199,3651948,1757019225,-2.210588445461337,24045.63971400261,34350,7.8311688311688314
+cda-server-2,False,24089.56317305565,"{'sample_time_ms': 43494.818, 'num_steps_trained': 739200, 'grad_time_ms': 378.397, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 80.76864624023438, 'policy_loss': -0.13747276365756989, 'vf_explained_var': 0.1064247190952301, 'entropy': 6.598756790161133, 'cur_lr': 4.999999873689376e-05, 'total_loss': 80.67037963867188, 'kl': 0.011474408209323883}, 'load_time_ms': 0.72, 'num_steps_sampled': 739200, 'update_time_ms': 2.786}",616,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.92345905303955,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,739200,739200,{},616,143,-43.274968244869726,2025-09-04_22-54-29,8.00034231418727,3651948,1757019269,-2.892525819712943,24089.56317305565,34493,8.314685314685315
+cda-server-2,False,24132.587853193283,"{'sample_time_ms': 43367.12, 'num_steps_trained': 740400, 'grad_time_ms': 376.518, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 196.58050537109375, 'policy_loss': -0.12371982634067535, 'vf_explained_var': 0.08463575690984726, 'entropy': 7.091447353363037, 'cur_lr': 4.999999873689376e-05, 'total_loss': 196.49053955078125, 'kl': 0.009876329451799393}, 'load_time_ms': 0.702, 'num_steps_sampled': 740400, 'update_time_ms': 2.776}",617,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.02468013763428,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,740400,740400,{},617,128,-84.37941586798468,2025-09-04_22-55-12,8.000285617819653,3651948,1757019312,-4.624479288788307,24132.587853193283,34621,9.2890625
+cda-server-2,False,24176.85699081421,"{'sample_time_ms': 43434.809, 'num_steps_trained': 741600, 'grad_time_ms': 376.645, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 80.58283233642578, 'policy_loss': -0.12967443466186523, 'vf_explained_var': 0.10440634936094284, 'entropy': 6.300535202026367, 'cur_lr': 4.999999873689376e-05, 'total_loss': 80.50260925292969, 'kl': 0.014469693414866924}, 'load_time_ms': 0.699, 'num_steps_sampled': 741600, 'update_time_ms': 2.738}",618,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",44.2691376209259,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,741600,741600,{},618,149,-41.537811579281005,2025-09-04_22-55-56,8.000343860107382,3651948,1757019356,-2.679667372974014,24176.85699081421,34770,8.167785234899329
+cda-server-2,False,24221.655776262283,"{'sample_time_ms': 43571.083, 'num_steps_trained': 742800, 'grad_time_ms': 375.321, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 75.33663940429688, 'policy_loss': -0.1256122887134552, 'vf_explained_var': 0.1342889964580536, 'entropy': 6.3217339515686035, 'cur_lr': 4.999999873689376e-05, 'total_loss': 75.2529296875, 'kl': 0.012261205352842808}, 'load_time_ms': 0.693, 'num_steps_sampled': 742800, 'update_time_ms': 2.705}",619,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",44.79878544807434,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,742800,742800,{},619,156,-44.620420730516805,2025-09-04_22-56-41,8.000067229446792,3651948,1757019401,-1.8540265913132556,24221.655776262283,34926,7.673076923076923
+cda-server-2,False,24264.91754412651,"{'sample_time_ms': 43533.036, 'num_steps_trained': 744000, 'grad_time_ms': 373.499, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 95.90986633300781, 'policy_loss': -0.13190573453903198, 'vf_explained_var': 0.09556801617145538, 'entropy': 6.6097893714904785, 'cur_lr': 4.999999873689376e-05, 'total_loss': 95.8134765625, 'kl': 0.010394017212092876}, 'load_time_ms': 0.703, 'num_steps_sampled': 744000, 'update_time_ms': 2.633}",620,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.261767864227295,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,744000,744000,{},620,144,-80.9832114175946,2025-09-04_22-57-24,8.001205774148408,3651948,1757019444,-2.9950229317180774,24264.91754412651,35070,8.32638888888889
+cda-server-2,False,24309.009521722794,"{'sample_time_ms': 43475.999, 'num_steps_trained': 745200, 'grad_time_ms': 371.539, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 69.21296691894531, 'policy_loss': -0.1333545297384262, 'vf_explained_var': 0.11814220994710922, 'entropy': 6.632723331451416, 'cur_lr': 4.999999873689376e-05, 'total_loss': 69.12251281738281, 'kl': 0.01255726721137762}, 'load_time_ms': 0.702, 'num_steps_sampled': 745200, 'update_time_ms': 2.656}",621,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",44.09197759628296,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,745200,745200,{},621,156,-41.969145148756176,2025-09-04_22-58-09,8.000122929567713,3651948,1757019489,-1.945971003510623,24309.009521722794,35226,7.743589743589744
+cda-server-2,False,24352.62490963936,"{'sample_time_ms': 43579.795, 'num_steps_trained': 746400, 'grad_time_ms': 370.359, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 147.01626586914062, 'policy_loss': -0.12787127494812012, 'vf_explained_var': 0.12394154071807861, 'entropy': 6.46849250793457, 'cur_lr': 4.999999873689376e-05, 'total_loss': 146.9237518310547, 'kl': 0.010346510447561741}, 'load_time_ms': 0.705, 'num_steps_sampled': 746400, 'update_time_ms': 2.586}",622,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.61538791656494,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,746400,746400,{},622,146,-82.50721364693268,2025-09-04_22-58-52,8.000054162007205,3651948,1757019532,-2.8609263828961082,24352.62490963936,35372,8.198630136986301
+cda-server-2,False,24396.891786575317,"{'sample_time_ms': 43528.037, 'num_steps_trained': 747600, 'grad_time_ms': 368.726, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 128.25091552734375, 'policy_loss': -0.11456486582756042, 'vf_explained_var': 0.11836099624633789, 'entropy': 6.185139179229736, 'cur_lr': 4.999999873689376e-05, 'total_loss': 128.16693115234375, 'kl': 0.008946657180786133}, 'load_time_ms': 0.695, 'num_steps_sampled': 747600, 'update_time_ms': 2.532}",623,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",44.26687693595886,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,747600,747600,{},623,155,-85.04620785738832,2025-09-04_22-59-36,8.000154097887364,3651948,1757019576,-2.152161328113019,24396.891786575317,35527,7.754838709677419
+cda-server-2,False,24442.364223718643,"{'sample_time_ms': 43679.584, 'num_steps_trained': 748800, 'grad_time_ms': 365.217, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 87.53225708007812, 'policy_loss': -0.12074670195579529, 'vf_explained_var': 0.1277458518743515, 'entropy': 6.682364463806152, 'cur_lr': 4.999999873689376e-05, 'total_loss': 87.46076965332031, 'kl': 0.014415502548217773}, 'load_time_ms': 0.676, 'num_steps_sampled': 748800, 'update_time_ms': 2.503}",624,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",45.472437143325806,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,748800,748800,{},624,157,-46.17701575138811,2025-09-04_23-00-22,8.000141397449267,3651948,1757019622,-1.8435011198408267,24442.364223718643,35684,7.687898089171974
+cda-server-2,False,24487.214854002,"{'sample_time_ms': 43785.715, 'num_steps_trained': 750000, 'grad_time_ms': 363.547, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 75.34528350830078, 'policy_loss': -0.1255345344543457, 'vf_explained_var': 0.13604828715324402, 'entropy': 6.4094929695129395, 'cur_lr': 4.999999873689376e-05, 'total_loss': 75.26229858398438, 'kl': 0.012451428920030594}, 'load_time_ms': 0.673, 'num_steps_sampled': 750000, 'update_time_ms': 2.51}",625,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",44.85063028335571,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,750000,750000,{},625,156,-40.759977403879,2025-09-04_23-01-07,8.000353468310337,3651948,1757019667,-1.7018728022929461,24487.214854002,35840,7.596153846153846
+cda-server-2,False,24532.998854875565,"{'sample_time_ms': 43972.231, 'num_steps_trained': 751200, 'grad_time_ms': 363.122, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 101.50741577148438, 'policy_loss': -0.12551593780517578, 'vf_explained_var': 0.09942556172609329, 'entropy': 6.604660987854004, 'cur_lr': 4.999999873689376e-05, 'total_loss': 101.4166259765625, 'kl': 0.01015991810709238}, 'load_time_ms': 0.663, 'num_steps_sampled': 751200, 'update_time_ms': 2.44}",626,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",45.784000873565674,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,751200,751200,{},626,151,-55.83485696934436,2025-09-04_23-01-53,8.000416785293512,3651948,1757019713,-2.4108761205014515,24532.998854875565,35991,7.9801324503311255
+cda-server-2,False,24578.68521976471,"{'sample_time_ms': 44238.021, 'num_steps_trained': 752400, 'grad_time_ms': 363.44, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 102.60040283203125, 'policy_loss': -0.1330137997865677, 'vf_explained_var': 0.1471284031867981, 'entropy': 6.186196327209473, 'cur_lr': 4.999999873689376e-05, 'total_loss': 102.50945281982422, 'kl': 0.012312407605350018}, 'load_time_ms': 0.668, 'num_steps_sampled': 752400, 'update_time_ms': 2.453}",627,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",45.6863648891449,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,752400,752400,{},627,167,-54.97736939145061,2025-09-04_23-02-38,8.001673668214465,3651948,1757019758,-0.9532556604035017,24578.68521976471,36158,7.11377245508982
+cda-server-2,False,24622.602730989456,"{'sample_time_ms': 44203.22, 'num_steps_trained': 753600, 'grad_time_ms': 363.045, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 78.02803802490234, 'policy_loss': -0.12999826669692993, 'vf_explained_var': 0.12903517484664917, 'entropy': 6.610939979553223, 'cur_lr': 4.999999873689376e-05, 'total_loss': 77.93390655517578, 'kl': 0.010497664101421833}, 'load_time_ms': 0.661, 'num_steps_sampled': 753600, 'update_time_ms': 2.464}",628,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.917511224746704,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,753600,753600,{},628,149,-45.96642426016081,2025-09-04_23-03-22,8.000423726653253,3651948,1757019802,-2.5990424894013713,24622.602730989456,36307,8.134228187919463
+cda-server-2,False,24666.221581220627,"{'sample_time_ms': 44084.546, 'num_steps_trained': 754800, 'grad_time_ms': 363.648, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 89.68988800048828, 'policy_loss': -0.12220560014247894, 'vf_explained_var': 0.1673087626695633, 'entropy': 6.403261184692383, 'cur_lr': 4.999999873689376e-05, 'total_loss': 89.6006088256836, 'kl': 0.009637761861085892}, 'load_time_ms': 0.674, 'num_steps_sampled': 754800, 'update_time_ms': 2.487}",629,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.618850231170654,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,754800,754800,{},629,144,-57.61103400776423,2025-09-04_23-04-06,8.000681241852664,3651948,1757019846,-3.0005569397759198,24666.221581220627,36451,8.354166666666666
+cda-server-2,False,24710.076257944107,"{'sample_time_ms': 44141.343, 'num_steps_trained': 756000, 'grad_time_ms': 366.074, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 97.13150024414062, 'policy_loss': -0.12444964796304703, 'vf_explained_var': 0.1308441162109375, 'entropy': 5.918638229370117, 'cur_lr': 4.999999873689376e-05, 'total_loss': 97.04147338867188, 'kl': 0.010075706988573074}, 'load_time_ms': 0.671, 'num_steps_sampled': 756000, 'update_time_ms': 2.495}",630,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.854676723480225,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,756000,756000,{},630,156,-49.30420801763324,2025-09-04_23-04-50,8.000077690402945,3651948,1757019890,-2.0673539638389835,24710.076257944107,36607,7.7243589743589745
+cda-server-2,False,24754.239033460617,"{'sample_time_ms': 44146.504, 'num_steps_trained': 757200, 'grad_time_ms': 368.023, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 63.48638153076172, 'policy_loss': -0.13635939359664917, 'vf_explained_var': 0.16754120588302612, 'entropy': 6.467654705047607, 'cur_lr': 4.999999873689376e-05, 'total_loss': 63.38639450073242, 'kl': 0.01064166147261858}, 'load_time_ms': 0.665, 'num_steps_sampled': 757200, 'update_time_ms': 2.508}",631,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",44.16277551651001,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,757200,757200,{},631,153,-41.49477237485326,2025-09-04_23-05-34,8.000474989383674,3651948,1757019934,-1.986536782886692,24754.239033460617,36760,7.784313725490196
+cda-server-2,False,24800.753933668137,"{'sample_time_ms': 44434.667, 'num_steps_trained': 758400, 'grad_time_ms': 369.802, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 135.25201416015625, 'policy_loss': -0.12403617799282074, 'vf_explained_var': 0.1265505999326706, 'entropy': 6.6458587646484375, 'cur_lr': 4.999999873689376e-05, 'total_loss': 135.15782165527344, 'kl': 0.008736205287277699}, 'load_time_ms': 0.665, 'num_steps_sampled': 758400, 'update_time_ms': 2.473}",632,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.51490020751953,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,758400,758400,{},632,147,-87.41027328389559,2025-09-04_23-06-20,8.00020151228481,3651948,1757019980,-2.720434909127688,24800.753933668137,36907,8.224489795918368
+cda-server-2,False,24844.62259864807,"{'sample_time_ms': 44395.801, 'num_steps_trained': 759600, 'grad_time_ms': 368.813, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 49.94691848754883, 'policy_loss': -0.13533106446266174, 'vf_explained_var': 0.12871311604976654, 'entropy': 6.056528568267822, 'cur_lr': 4.999999873689376e-05, 'total_loss': 49.860191345214844, 'kl': 0.01422378420829773}, 'load_time_ms': 0.671, 'num_steps_sampled': 759600, 'update_time_ms': 2.506}",633,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.86866497993469,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,759600,759600,{},633,171,-31.30959013821765,2025-09-04_23-07-04,8.002170197632399,3651948,1757020024,-0.8340321662332024,24844.62259864807,37078,7.046783625730995
+cda-server-2,False,24888.49089694023,"{'sample_time_ms': 44231.95, 'num_steps_trained': 760800, 'grad_time_ms': 372.16, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 57.65977096557617, 'policy_loss': -0.12249165028333664, 'vf_explained_var': 0.19498319923877716, 'entropy': 6.04954719543457, 'cur_lr': 4.999999873689376e-05, 'total_loss': 57.57786560058594, 'kl': 0.011876864358782768}, 'load_time_ms': 0.69, 'num_steps_sampled': 760800, 'update_time_ms': 2.512}",634,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.868298292160034,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,760800,760800,{},634,163,-35.48862864066596,2025-09-04_23-07-48,8.00050602784834,3651948,1757020068,-1.2299932753925056,24888.49089694023,37241,7.269938650306749
+cda-server-2,False,24933.387457370758,"{'sample_time_ms': 44235.127, 'num_steps_trained': 762000, 'grad_time_ms': 373.55, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 66.62899017333984, 'policy_loss': -0.12201017886400223, 'vf_explained_var': 0.11644628643989563, 'entropy': 6.242872714996338, 'cur_lr': 4.999999873689376e-05, 'total_loss': 66.56243896484375, 'kl': 0.01623382419347763}, 'load_time_ms': 0.694, 'num_steps_sampled': 762000, 'update_time_ms': 2.525}",635,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",44.89656043052673,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,762000,762000,{},635,168,-39.96698073579465,2025-09-04_23-08-33,8.002326108816517,3651948,1757020113,-0.9746926353012546,24933.387457370758,37409,7.208333333333333
+cda-server-2,False,24977.66113090515,"{'sample_time_ms': 44084.915, 'num_steps_trained': 763200, 'grad_time_ms': 372.696, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 56.20417022705078, 'policy_loss': -0.13505983352661133, 'vf_explained_var': 0.1695346236228943, 'entropy': 6.184922695159912, 'cur_lr': 4.999999873689376e-05, 'total_loss': 56.1068229675293, 'kl': 0.011036181822419167}, 'load_time_ms': 0.696, 'num_steps_sampled': 763200, 'update_time_ms': 2.523}",636,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",44.27367353439331,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,763200,763200,{},636,165,-34.25751985972118,2025-09-04_23-09-17,8.000235318453338,3651948,1757020157,-1.191852890732927,24977.66113090515,37574,7.2727272727272725
+cda-server-2,False,25021.974050998688,"{'sample_time_ms': 43944.906, 'num_steps_trained': 764400, 'grad_time_ms': 375.315, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 58.89943313598633, 'policy_loss': -0.12993454933166504, 'vf_explained_var': 0.1601342409849167, 'entropy': 6.27501916885376, 'cur_lr': 4.999999873689376e-05, 'total_loss': 58.80284881591797, 'kl': 0.009758922271430492}, 'load_time_ms': 0.724, 'num_steps_sampled': 764400, 'update_time_ms': 2.536}",637,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",44.31292009353638,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,764400,764400,{},637,161,-46.09734988824823,2025-09-04_23-10-02,8.000186043016656,3651948,1757020202,-1.416363866425865,25021.974050998688,37735,7.422360248447205
+cda-server-2,False,25068.162934303284,"{'sample_time_ms': 44173.718, 'num_steps_trained': 765600, 'grad_time_ms': 373.69, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 72.19373321533203, 'policy_loss': -0.1261482536792755, 'vf_explained_var': 0.15021146833896637, 'entropy': 5.87333869934082, 'cur_lr': 4.999999873689376e-05, 'total_loss': 72.1015625, 'kl': 0.009941894561052322}, 'load_time_ms': 0.726, 'num_steps_sampled': 765600, 'update_time_ms': 2.519}",638,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.18888330459595,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,765600,765600,{},638,169,-50.312506479116315,2025-09-04_23-10-48,8.000159502364632,3651948,1757020248,-0.95737282468245,25068.162934303284,37904,7.136094674556213
+cda-server-2,False,25111.163761615753,"{'sample_time_ms': 44114.069, 'num_steps_trained': 766800, 'grad_time_ms': 371.575, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 94.76095581054688, 'policy_loss': -0.1292141079902649, 'vf_explained_var': 0.13159912824630737, 'entropy': 6.698611736297607, 'cur_lr': 4.999999873689376e-05, 'total_loss': 94.67372131347656, 'kl': 0.012286549434065819}, 'load_time_ms': 0.712, 'num_steps_sampled': 766800, 'update_time_ms': 2.491}",639,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.00082731246948,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,766800,766800,{},639,131,-55.47845012881969,2025-09-04_23-11-31,8.000006753771736,3651948,1757020291,-3.9606470500245923,25111.163761615753,38035,8.908396946564885
+cda-server-2,False,25155.178235292435,"{'sample_time_ms': 44132.72, 'num_steps_trained': 768000, 'grad_time_ms': 369.019, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 83.28701782226562, 'policy_loss': -0.14332841336727142, 'vf_explained_var': 0.16322636604309082, 'entropy': 6.342925071716309, 'cur_lr': 4.999999873689376e-05, 'total_loss': 83.18607330322266, 'kl': 0.012402743101119995}, 'load_time_ms': 0.705, 'num_steps_sampled': 768000, 'update_time_ms': 2.496}",640,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",44.01447367668152,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,768000,768000,{},640,152,-49.45546957041463,2025-09-04_23-12-15,8.00152596236635,3651948,1757020335,-2.256861185451964,25155.178235292435,38187,7.947368421052632
+cda-server-2,False,25199.824682474136,"{'sample_time_ms': 44182.438, 'num_steps_trained': 769200, 'grad_time_ms': 367.626, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 85.19735717773438, 'policy_loss': -0.12319857627153397, 'vf_explained_var': 0.15076127648353577, 'entropy': 6.567890167236328, 'cur_lr': 4.999999873689376e-05, 'total_loss': 85.11085510253906, 'kl': 0.01073968131095171}, 'load_time_ms': 0.708, 'num_steps_sampled': 769200, 'update_time_ms': 2.512}",641,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",44.64644718170166,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,769200,769200,{},641,163,-59.73300312535099,2025-09-04_23-13-00,8.000303664208356,3651948,1757020380,-1.2574134934606342,25199.824682474136,38350,7.374233128834356
+cda-server-2,False,25244.16807460785,"{'sample_time_ms': 43967.364, 'num_steps_trained': 770400, 'grad_time_ms': 365.571, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 44.156490325927734, 'policy_loss': -0.12580448389053345, 'vf_explained_var': 0.22070711851119995, 'entropy': 5.61702299118042, 'cur_lr': 4.999999873689376e-05, 'total_loss': 44.07794952392578, 'kl': 0.013830197975039482}, 'load_time_ms': 0.712, 'num_steps_sampled': 770400, 'update_time_ms': 2.533}",642,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",44.34339213371277,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,770400,770400,{},642,180,-31.859189695407828,2025-09-04_23-13-44,8.00001502181127,3651948,1757020424,-0.2685069614762518,25244.16807460785,38530,6.655555555555556
+cda-server-2,False,25289.16328573227,"{'sample_time_ms': 44078.562, 'num_steps_trained': 771600, 'grad_time_ms': 366.989, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 52.407814025878906, 'policy_loss': -0.13617174327373505, 'vf_explained_var': 0.17225253582000732, 'entropy': 6.01146125793457, 'cur_lr': 4.999999873689376e-05, 'total_loss': 52.309391021728516, 'kl': 0.011046170257031918}, 'load_time_ms': 0.705, 'num_steps_sampled': 771600, 'update_time_ms': 2.489}",643,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",44.995211124420166,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,771600,771600,{},643,180,-45.39402818782199,2025-09-04_23-14-29,8.000541824214974,3651948,1757020469,-0.2954727960442065,25289.16328573227,38710,6.716666666666667
+cda-server-2,False,25334.301076173782,"{'sample_time_ms': 44208.303, 'num_steps_trained': 772800, 'grad_time_ms': 364.212, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 74.90103149414062, 'policy_loss': -0.1326437145471573, 'vf_explained_var': 0.14354650676250458, 'entropy': 5.894720077514648, 'cur_lr': 4.999999873689376e-05, 'total_loss': 74.80452728271484, 'kl': 0.01057159248739481}, 'load_time_ms': 0.69, 'num_steps_sampled': 772800, 'update_time_ms': 2.524}",644,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",45.13779044151306,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,772800,772800,{},644,174,-51.61607029080748,2025-09-04_23-15-14,8.000028939890152,3651948,1757020514,-0.6184026541082966,25334.301076173782,38884,6.9655172413793105
+cda-server-2,False,25379.033576965332,"{'sample_time_ms': 44191.664, 'num_steps_trained': 774000, 'grad_time_ms': 364.451, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 58.47435760498047, 'policy_loss': -0.1249840259552002, 'vf_explained_var': 0.18261970579624176, 'entropy': 6.09347677230835, 'cur_lr': 4.999999873689376e-05, 'total_loss': 58.39113998413086, 'kl': 0.012223862111568451}, 'load_time_ms': 0.688, 'num_steps_sampled': 774000, 'update_time_ms': 2.505}",645,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",44.73250079154968,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,774000,774000,{},645,173,-36.35054553935688,2025-09-04_23-15-59,8.000086752426387,3651948,1757020559,-0.5923463946100265,25379.033576965332,39057,6.895953757225434
+cda-server-2,False,25423.75276517868,"{'sample_time_ms': 44235.68, 'num_steps_trained': 775200, 'grad_time_ms': 364.998, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 78.30078125, 'policy_loss': -0.13311919569969177, 'vf_explained_var': 0.1764691174030304, 'entropy': 6.262962341308594, 'cur_lr': 4.999999873689376e-05, 'total_loss': 78.20417022705078, 'kl': 0.010683656670153141}, 'load_time_ms': 0.693, 'num_steps_sampled': 775200, 'update_time_ms': 2.533}",646,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",44.71918821334839,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,775200,775200,{},646,160,-45.67624841012206,2025-09-04_23-16-44,8.000071928434444,3651948,1757020604,-1.5840930736773107,25423.75276517868,39217,7.475
+cda-server-2,False,25468.24201607704,"{'sample_time_ms': 44254.541, 'num_steps_trained': 776400, 'grad_time_ms': 363.852, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 74.37799835205078, 'policy_loss': -0.13237418234348297, 'vf_explained_var': 0.1862848401069641, 'entropy': 6.353860855102539, 'cur_lr': 4.999999873689376e-05, 'total_loss': 74.28138732910156, 'kl': 0.010467816144227982}, 'load_time_ms': 0.665, 'num_steps_sampled': 776400, 'update_time_ms': 2.478}",647,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",44.489250898361206,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,776400,776400,{},647,160,-51.045424512164175,2025-09-04_23-17-28,8.000171233385412,3651948,1757020648,-1.5128781970789107,25468.24201607704,39377,7.50625
+cda-server-2,False,25512.02780008316,"{'sample_time_ms': 44012.555, 'num_steps_trained': 777600, 'grad_time_ms': 365.473, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 81.03668975830078, 'policy_loss': -0.12070560455322266, 'vf_explained_var': 0.1592569500207901, 'entropy': 6.48253059387207, 'cur_lr': 4.999999873689376e-05, 'total_loss': 80.95088958740234, 'kl': 0.010214617475867271}, 'load_time_ms': 0.668, 'num_steps_sampled': 777600, 'update_time_ms': 2.501}",648,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",43.785784006118774,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,777600,777600,{},648,158,-56.738729062171075,2025-09-04_23-18-12,8.000098480557286,3651948,1757020692,-1.6814214758854766,25512.02780008316,39535,7.620253164556962
+cda-server-2,False,25557.458827733994,"{'sample_time_ms': 44253.806, 'num_steps_trained': 778800, 'grad_time_ms': 367.205, 'default': {'cur_kl_coeff': 3.417187452316284, 'vf_loss': 71.04450988769531, 'policy_loss': -0.13149532675743103, 'vf_explained_var': 0.15487469732761383, 'entropy': 6.051618576049805, 'cur_lr': 4.999999873689376e-05, 'total_loss': 70.95345306396484, 'kl': 0.011835633777081966}, 'load_time_ms': 0.681, 'num_steps_sampled': 778800, 'update_time_ms': 2.541}",649,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_rewards': None, 'clip_param': 0.3, 'num_envs_per_worker': 1, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'sample_async': False, 'optimizer': {}, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_step': None, 'on_episode_start': None, 'on_episode_end': None}, 'straggler_mitigation': False, 'lr': 5e-05, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'log_level': 'INFO', 'postprocess_inputs': False, 'use_gae': True, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",45.43102765083313,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,778800,778800,{},649,180,-38.79409213084881,2025-09-04_23-18-57,8.000220374057966,3651948,1757020737,-0.13380163012770663,25557.458827733994,39715,6.6722222222222225
+cda-server-2,False,49.171587228775024,"{'sample_time_ms': 48035.472, 'num_steps_trained': 769200, 'grad_time_ms': 658.142, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 80.39810943603516, 'cur_kl_coeff': 3.417187452316284, 'policy_loss': -0.1255156397819519, 'vf_explained_var': 0.1464996486902237, 'entropy': 6.043203353881836, 'total_loss': 80.30876922607422, 'kl': 0.01058445405215025}, 'load_time_ms': 32.209, 'num_steps_sampled': 769200, 'update_time_ms': 425.86}",641,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",49.171587228775024,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,1200,769200,{},1,165,-51.01398471661906,2025-09-04_23-20-27,8.00008693886987,3651947,1757020827,-1.0061105095131504,25204.34982252121,38352,7.16969696969697
+cda-server-2,False,92.87554669380188,"{'sample_time_ms': 45672.924, 'num_steps_trained': 770400, 'grad_time_ms': 521.629, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 59.50916290283203, 'cur_kl_coeff': 0.20000000298023224, 'policy_loss': -0.16246187686920166, 'vf_explained_var': 0.14330193400382996, 'entropy': 6.238642692565918, 'total_loss': 59.35683822631836, 'kl': 0.05069645121693611}, 'load_time_ms': 16.522, 'num_steps_sampled': 770400, 'update_time_ms': 214.116}",642,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",43.703959465026855,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,2400,770400,{},2,162,-29.439730138293463,2025-09-04_23-21-11,8.000001114768118,3651947,1757020871,-1.4558712941185261,25248.053781986237,38514,7.450617283950617
+cda-server-2,False,136.97035884857178,"{'sample_time_ms': 45020.759, 'num_steps_trained': 771600, 'grad_time_ms': 470.839, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 76.09286499023438, 'cur_kl_coeff': 0.30000001192092896, 'policy_loss': -0.14440931379795074, 'vf_explained_var': 0.175571471452713, 'entropy': 5.886499881744385, 'total_loss': 75.96224212646484, 'kl': 0.04594428837299347}, 'load_time_ms': 11.24, 'num_steps_sampled': 771600, 'update_time_ms': 143.618}",643,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",44.0948121547699,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,3600,771600,{},3,176,-44.25181005787245,2025-09-04_23-21-55,8.000157673943901,3651947,1757020915,-0.40405477821694546,25292.148594141006,38690,6.840909090909091
+cda-server-2,False,181.23059058189392,"{'sample_time_ms': 44737.901, 'num_steps_trained': 772800, 'grad_time_ms': 443.813, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 102.93730163574219, 'cur_kl_coeff': 0.44999995827674866, 'policy_loss': -0.1430501639842987, 'vf_explained_var': 0.13208433985710144, 'entropy': 6.184451580047607, 'total_loss': 102.80974578857422, 'kl': 0.03442486748099327}, 'load_time_ms': 8.596, 'num_steps_sampled': 772800, 'update_time_ms': 108.397}",644,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",44.260231733322144,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,4800,772800,{},4,161,-44.12001068039525,2025-09-04_23-22-40,8.000031808846654,3651947,1757020960,-0.975076751191758,25336.40882587433,38851,7.192546583850931
+cda-server-2,False,226.11338710784912,"{'sample_time_ms': 44690.604, 'num_steps_trained': 774000, 'grad_time_ms': 429.686, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 95.57435607910156, 'cur_kl_coeff': 0.675000011920929, 'policy_loss': -0.12285302579402924, 'vf_explained_var': 0.1429286152124405, 'entropy': 5.850916385650635, 'total_loss': 95.47222900390625, 'kl': 0.0307097639888525}, 'load_time_ms': 7.026, 'num_steps_sampled': 774000, 'update_time_ms': 87.196}",645,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",44.8827965259552,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,6000,774000,{},5,182,-78.25757381970666,2025-09-04_23-23-24,8.000250772352839,3651947,1757021004,-0.33489644070277547,25381.291622400284,39033,6.747252747252747
+cda-server-2,False,270.35734510421753,"{'sample_time_ms': 44550.027, 'num_steps_trained': 775200, 'grad_time_ms': 422.678, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 48.1241569519043, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.1484123319387436, 'vf_explained_var': 0.19235184788703918, 'entropy': 6.080326557159424, 'total_loss': 48.00688934326172, 'kl': 0.030759645625948906}, 'load_time_ms': 5.975, 'num_steps_sampled': 775200, 'update_time_ms': 73.07}",646,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",44.24395799636841,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,7200,775200,{},6,178,-37.51212203780169,2025-09-04_23-24-09,8.000157856662643,3651947,1757021049,-0.2909470011072178,25425.535580396652,39211,6.758426966292135
+cda-server-2,False,316.5271723270416,"{'sample_time_ms': 44726.874, 'num_steps_trained': 776400, 'grad_time_ms': 415.546, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 86.42475128173828, 'cur_kl_coeff': 1.5187499523162842, 'policy_loss': -0.12338193506002426, 'vf_explained_var': 0.1892755925655365, 'entropy': 5.520815372467041, 'total_loss': 86.34359741210938, 'kl': 0.027807703241705894}, 'load_time_ms': 5.216, 'num_steps_sampled': 776400, 'update_time_ms': 63.018}",647,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",46.1698272228241,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,8400,776400,{},7,176,-55.95493109002247,2025-09-04_23-24-55,8.000083218792803,3651947,1757021095,-0.5961270299818546,25471.705407619476,39387,6.892045454545454
+cda-server-2,False,361.74718618392944,"{'sample_time_ms': 44739.606, 'num_steps_trained': 777600, 'grad_time_ms': 411.273, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 113.408203125, 'cur_kl_coeff': 2.278125047683716, 'policy_loss': -0.12122918665409088, 'vf_explained_var': 0.17214380204677582, 'entropy': 5.716729640960693, 'total_loss': 113.32117462158203, 'kl': 0.015010855160653591}, 'load_time_ms': 4.651, 'num_steps_sampled': 777600, 'update_time_ms': 55.57}",648,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",45.22001385688782,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,9600,777600,{},8,183,-78.73519817331818,2025-09-04_23-25-40,8.001581479005628,3651947,1757021140,0.020421928998772477,25516.925421476364,39570,6.530054644808743
+cda-server-2,False,405.73359274864197,"{'sample_time_ms': 44612.565, 'num_steps_trained': 778800, 'grad_time_ms': 407.992, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 91.44491577148438, 'cur_kl_coeff': 2.278125047683716, 'policy_loss': -0.12602534890174866, 'vf_explained_var': 0.18112537264823914, 'entropy': 6.0855255126953125, 'total_loss': 91.3521728515625, 'kl': 0.014609340578317642}, 'load_time_ms': 4.204, 'num_steps_sampled': 778800, 'update_time_ms': 49.681}",649,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",43.986406564712524,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,10800,778800,{},9,159,-59.37940086547401,2025-09-04_23-26-24,8.000040145160105,3651947,1757021184,-1.4770342364397093,25560.911828041077,39729,7.433962264150943
+cda-server-2,False,450.1522297859192,"{'sample_time_ms': 44553.201, 'num_steps_trained': 780000, 'grad_time_ms': 406.306, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 56.77754592895508, 'cur_kl_coeff': 2.278125047683716, 'policy_loss': -0.13296259939670563, 'vf_explained_var': 0.21793025732040405, 'entropy': 5.715707778930664, 'total_loss': 56.674400329589844, 'kl': 0.013089141808450222}, 'load_time_ms': 3.867, 'num_steps_sampled': 780000, 'update_time_ms': 44.964}",650,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",44.41863703727722,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,12000,780000,{},10,183,-44.98278044619995,2025-09-04_23-27-09,8.000034881855434,3651947,1757021229,-0.1042382943825305,25605.330465078354,39912,6.666666666666667
+cda-server-2,False,494.83312129974365,"{'sample_time_ms': 44178.447, 'num_steps_trained': 781200, 'grad_time_ms': 378.741, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 63.68096160888672, 'cur_kl_coeff': 2.278125047683716, 'policy_loss': -0.13753175735473633, 'vf_explained_var': 0.18605300784111023, 'entropy': 5.709993362426758, 'total_loss': 63.57604217529297, 'kl': 0.014315648004412651}, 'load_time_ms': 0.729, 'num_steps_sampled': 781200, 'update_time_ms': 2.688}",651,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",44.68089151382446,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,13200,781200,{},11,181,-44.5715967312163,2025-09-04_23-27-53,8.000142848258454,3651947,1757021273,0.07179095830126418,25650.01135659218,40093,6.530386740331492
+cda-server-2,False,540.5377907752991,"{'sample_time_ms': 44379.498, 'num_steps_trained': 782400, 'grad_time_ms': 377.719, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 56.64405822753906, 'cur_kl_coeff': 2.278125047683716, 'policy_loss': -0.12208235263824463, 'vf_explained_var': 0.2421044558286667, 'entropy': 5.548465251922607, 'total_loss': 56.55010223388672, 'kl': 0.012348240241408348}, 'load_time_ms': 0.722, 'num_steps_sampled': 782400, 'update_time_ms': 2.726}",652,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",45.70466947555542,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,14400,782400,{},12,192,-45.844932623928244,2025-09-04_23-28-39,8.000027808861791,3651947,1757021319,0.34657583016566823,25695.716026067734,40285,6.359375
+cda-server-2,False,585.3861379623413,"{'sample_time_ms': 44455.498, 'num_steps_trained': 783600, 'grad_time_ms': 377.128, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 43.1224365234375, 'cur_kl_coeff': 2.278125047683716, 'policy_loss': -0.11802230775356293, 'vf_explained_var': 0.212782621383667, 'entropy': 5.450656890869141, 'total_loss': 43.03515625, 'kl': 0.013498026877641678}, 'load_time_ms': 0.729, 'num_steps_sampled': 783600, 'update_time_ms': 2.736}",653,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",44.848347187042236,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,15600,783600,{},13,198,-42.28307275126343,2025-09-04_23-29-24,8.000742460745652,3651947,1757021364,0.8650437457537659,25740.564373254776,40483,6.015151515151516
+cda-server-2,False,629.6491882801056,"{'sample_time_ms': 44455.938, 'num_steps_trained': 784800, 'grad_time_ms': 376.972, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 35.976253509521484, 'cur_kl_coeff': 2.278125047683716, 'policy_loss': -0.12478828430175781, 'vf_explained_var': 0.20753701031208038, 'entropy': 5.734801292419434, 'total_loss': 35.886383056640625, 'kl': 0.01532667689025402}, 'load_time_ms': 0.731, 'num_steps_sampled': 784800, 'update_time_ms': 2.69}",654,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",44.26305031776428,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,16800,784800,{},14,179,-26.925111122125088,2025-09-04_23-30-08,8.000073103811902,3651947,1757021408,-0.25536267353591746,25784.82742357254,40662,6.692737430167598
+cda-server-2,False,673.8481390476227,"{'sample_time_ms': 44388.919, 'num_steps_trained': 786000, 'grad_time_ms': 375.613, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 80.87190246582031, 'cur_kl_coeff': 2.278125047683716, 'policy_loss': -0.1281072199344635, 'vf_explained_var': 0.17109166085720062, 'entropy': 5.943303108215332, 'total_loss': 80.77613830566406, 'kl': 0.014199022203683853}, 'load_time_ms': 0.727, 'num_steps_sampled': 786000, 'update_time_ms': 2.684}",655,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",44.19895076751709,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,18000,786000,{},15,177,-65.37146581760679,2025-09-04_23-30-52,8.000347745166987,3651947,1757021452,-0.40892512614434534,25829.026374340057,40839,6.830508474576271
+cda-server-2,False,718.9396080970764,"{'sample_time_ms': 44475.401, 'num_steps_trained': 787200, 'grad_time_ms': 373.923, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 49.944305419921875, 'cur_kl_coeff': 2.278125047683716, 'policy_loss': -0.11831830441951752, 'vf_explained_var': 0.23415027558803558, 'entropy': 5.348814487457275, 'total_loss': 49.86027145385742, 'kl': 0.015046972781419754}, 'load_time_ms': 0.723, 'num_steps_sampled': 787200, 'update_time_ms': 2.671}",656,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",45.091469049453735,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,19200,787200,{},16,195,-47.67059051017336,2025-09-04_23-31-38,8.00011914570923,3651947,1757021498,0.5777705556420285,25874.11784338951,41034,6.17948717948718
+cda-server-2,False,763.9840202331543,"{'sample_time_ms': 44362.228, 'num_steps_trained': 788400, 'grad_time_ms': 374.544, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 55.97596740722656, 'cur_kl_coeff': 2.278125047683716, 'policy_loss': -0.12983694672584534, 'vf_explained_var': 0.18978098034858704, 'entropy': 5.546915054321289, 'total_loss': 55.8853645324707, 'kl': 0.017223402857780457}, 'load_time_ms': 0.718, 'num_steps_sampled': 788400, 'update_time_ms': 2.682}",657,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",45.04441213607788,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,20400,788400,{},17,185,-45.1517098015243,2025-09-04_23-32-23,8.000063786623432,3651947,1757021543,0.2701023951341103,25919.16225552559,41219,6.383783783783784
+cda-server-2,False,809.0964961051941,"{'sample_time_ms': 44353.29, 'num_steps_trained': 789600, 'grad_time_ms': 372.868, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 69.65789031982422, 'cur_kl_coeff': 2.278125047683716, 'policy_loss': -0.10145619511604309, 'vf_explained_var': 0.17257185280323029, 'entropy': 5.02720308303833, 'total_loss': 69.60121154785156, 'kl': 0.01965337060391903}, 'load_time_ms': 0.71, 'num_steps_sampled': 789600, 'update_time_ms': 2.606}",658,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",45.112475872039795,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,21600,789600,{},18,201,-64.98972680038437,2025-09-04_23-33-08,8.000097602415536,3651947,1757021588,0.8802702053814631,25964.27473139763,41420,6.019900497512438
+cda-server-2,False,854.399516582489,"{'sample_time_ms': 44486.773, 'num_steps_trained': 790800, 'grad_time_ms': 371.047, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 54.54829025268555, 'cur_kl_coeff': 2.278125047683716, 'policy_loss': -0.14157640933990479, 'vf_explained_var': 0.2028069794178009, 'entropy': 5.475508213043213, 'total_loss': 54.43379592895508, 'kl': 0.011889781802892685}, 'load_time_ms': 0.712, 'num_steps_sampled': 790800, 'update_time_ms': 2.596}",659,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",45.30302047729492,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,22800,790800,{},19,196,-45.850798797415365,2025-09-04_23-33-53,8.001177423550006,3651947,1757021633,0.8742673896110602,26009.577751874924,41616,6.045918367346939
+cda-server-2,False,899.9549326896667,"{'sample_time_ms': 44602.517, 'num_steps_trained': 792000, 'grad_time_ms': 369.031, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 44.974159240722656, 'cur_kl_coeff': 2.278125047683716, 'policy_loss': -0.1250247359275818, 'vf_explained_var': 0.23093955218791962, 'entropy': 5.719305515289307, 'total_loss': 44.884029388427734, 'kl': 0.015319590456783772}, 'load_time_ms': 0.699, 'num_steps_sampled': 792000, 'update_time_ms': 2.59}",660,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",45.555416107177734,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,24000,792000,{},20,185,-35.04796864063427,2025-09-04_23-34-39,8.000493474008953,3651947,1757021679,0.037015927697965446,26055.1331679821,41801,6.589189189189189
+cda-server-2,False,945.4489457607269,"{'sample_time_ms': 44682.84, 'num_steps_trained': 793200, 'grad_time_ms': 370.143, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 36.74909591674805, 'cur_kl_coeff': 2.278125047683716, 'policy_loss': -0.12051972001791, 'vf_explained_var': 0.24064922332763672, 'entropy': 5.199189186096191, 'total_loss': 36.65876770019531, 'kl': 0.01325086411088705}, 'load_time_ms': 0.691, 'num_steps_sampled': 793200, 'update_time_ms': 2.522}",661,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",45.49401307106018,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,25200,793200,{},21,209,-39.681957257770044,2025-09-04_23-35-24,8.000275863011161,3651947,1757021724,1.387292508648218,26100.62718105316,42010,5.712918660287081
+cda-server-2,False,990.379124879837,"{'sample_time_ms': 44606.615, 'num_steps_trained': 794400, 'grad_time_ms': 368.948, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 42.93064880371094, 'cur_kl_coeff': 2.278125047683716, 'policy_loss': -0.11987128853797913, 'vf_explained_var': 0.2201704978942871, 'entropy': 5.277359962463379, 'total_loss': 42.84127426147461, 'kl': 0.013386152684688568}, 'load_time_ms': 0.678, 'num_steps_sampled': 794400, 'update_time_ms': 2.515}",662,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",44.93017911911011,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,26400,794400,{},22,197,-43.35083907526241,2025-09-04_23-36-09,8.000037223687016,3651947,1757021769,0.6875811895780369,26145.55736017227,42207,6.121827411167513
+cda-server-2,False,1035.5377969741821,"{'sample_time_ms': 44636.979, 'num_steps_trained': 795600, 'grad_time_ms': 369.604, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 61.62825393676758, 'cur_kl_coeff': 2.278125047683716, 'policy_loss': -0.12032897025346756, 'vf_explained_var': 0.2165907770395279, 'entropy': 5.845945358276367, 'total_loss': 61.54192352294922, 'kl': 0.014922077767550945}, 'load_time_ms': 0.675, 'num_steps_sampled': 795600, 'update_time_ms': 2.502}",663,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",45.15867209434509,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,27600,795600,{},23,183,-58.21461659548312,2025-09-04_23-36-54,8.00040239126019,3651947,1757021814,0.13866083116593356,26190.716032266617,42390,6.5136612021857925
+cda-server-2,False,1081.0638763904572,"{'sample_time_ms': 44763.182, 'num_steps_trained': 796800, 'grad_time_ms': 369.648, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 33.9642333984375, 'cur_kl_coeff': 2.278125047683716, 'policy_loss': -0.12466225028038025, 'vf_explained_var': 0.25159433484077454, 'entropy': 5.293583393096924, 'total_loss': 33.87174606323242, 'kl': 0.01412378903478384}, 'load_time_ms': 0.673, 'num_steps_sampled': 796800, 'update_time_ms': 2.536}",664,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",45.526079416275024,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,28800,796800,{},24,195,-40.69437990214462,2025-09-04_23-37-40,8.000296837780727,3651947,1757021860,0.6098290167509591,26236.242111682892,42585,6.2
+cda-server-2,False,1125.7389600276947,"{'sample_time_ms': 44808.083, 'num_steps_trained': 798000, 'grad_time_ms': 372.214, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 52.32118606567383, 'cur_kl_coeff': 2.278125047683716, 'policy_loss': -0.1294259876012802, 'vf_explained_var': 0.24719592928886414, 'entropy': 5.405237674713135, 'total_loss': 52.22242736816406, 'kl': 0.013461814261972904}, 'load_time_ms': 0.67, 'num_steps_sampled': 798000, 'update_time_ms': 2.552}",665,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",44.67508363723755,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,30000,798000,{},25,195,-38.498745869441926,2025-09-04_23-38-25,8.000070550566782,3651947,1757021905,0.6551816421254213,26280.91719532013,42780,6.143589743589744
+cda-server-2,False,1171.0526955127716,"{'sample_time_ms': 44830.711, 'num_steps_trained': 799200, 'grad_time_ms': 371.812, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 38.177764892578125, 'cur_kl_coeff': 2.278125047683716, 'policy_loss': -0.11706037074327469, 'vf_explained_var': 0.23632574081420898, 'entropy': 4.860657215118408, 'total_loss': 38.09125900268555, 'kl': 0.013413351960480213}, 'load_time_ms': 0.679, 'num_steps_sampled': 799200, 'update_time_ms': 2.566}",666,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",45.313735485076904,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,31200,799200,{},26,215,-35.12925402894308,2025-09-04_23-39-10,8.000830990614764,3651947,1757021950,1.6261670936304542,26326.230930805206,42995,5.530232558139535
+cda-server-2,False,1215.927493572235,"{'sample_time_ms': 44815.717, 'num_steps_trained': 800400, 'grad_time_ms': 369.93, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 33.41366195678711, 'cur_kl_coeff': 2.278125047683716, 'policy_loss': -0.1383817344903946, 'vf_explained_var': 0.22734716534614563, 'entropy': 5.135607719421387, 'total_loss': 33.31162643432617, 'kl': 0.015954695641994476}, 'load_time_ms': 0.681, 'num_steps_sampled': 800400, 'update_time_ms': 2.528}",667,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",44.8747980594635,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,32400,800400,{},27,201,-27.462859210559884,2025-09-04_23-39-55,8.000123605264683,3651947,1757021995,0.9348612168659016,26371.10572886467,43196,5.9950248756218905
+cda-server-2,False,1261.8195703029633,"{'sample_time_ms': 44890.892, 'num_steps_trained': 801600, 'grad_time_ms': 372.65, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 35.195194244384766, 'cur_kl_coeff': 2.278125047683716, 'policy_loss': -0.1105131208896637, 'vf_explained_var': 0.27388784289360046, 'entropy': 4.7763848304748535, 'total_loss': 35.11569595336914, 'kl': 0.013614475727081299}, 'load_time_ms': 0.702, 'num_steps_sampled': 801600, 'update_time_ms': 2.515}",668,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",45.89207673072815,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,33600,801600,{},28,224,-33.62043392646845,2025-09-04_23-40-41,8.000203944907547,3651947,1757022041,1.8492366591700886,26416.997805595398,43420,5.40625
+cda-server-2,False,1307.6148715019226,"{'sample_time_ms': 44939.0, 'num_steps_trained': 802800, 'grad_time_ms': 373.657, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 34.7144775390625, 'cur_kl_coeff': 2.278125047683716, 'policy_loss': -0.11451967805624008, 'vf_explained_var': 0.2770408093929291, 'entropy': 4.928555011749268, 'total_loss': 34.6353874206543, 'kl': 0.015551049262285233}, 'load_time_ms': 0.708, 'num_steps_sampled': 802800, 'update_time_ms': 2.544}",669,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",45.79530119895935,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,34800,802800,{},29,211,-30.87080522972294,2025-09-04_23-41-26,8.000065735985554,3651947,1757022086,1.3544957190944287,26462.793106794357,43631,5.682464454976303
+cda-server-2,False,1352.7094790935516,"{'sample_time_ms': 44891.559, 'num_steps_trained': 804000, 'grad_time_ms': 374.952, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 50.53966522216797, 'cur_kl_coeff': 2.278125047683716, 'policy_loss': -0.11584460735321045, 'vf_explained_var': 0.22542423009872437, 'entropy': 5.154943466186523, 'total_loss': 50.45745849609375, 'kl': 0.014764294028282166}, 'load_time_ms': 0.708, 'num_steps_sampled': 804000, 'update_time_ms': 2.575}",670,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",45.09460759162903,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,36000,804000,{},30,205,-39.661008442806754,2025-09-04_23-42-12,8.000056559355581,3651947,1757022132,1.1623952181782655,26507.887714385986,43836,5.824390243902439
+cda-server-2,False,1397.40673995018,"{'sample_time_ms': 44813.697, 'num_steps_trained': 805200, 'grad_time_ms': 373.182, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 54.982933044433594, 'cur_kl_coeff': 2.278125047683716, 'policy_loss': -0.11039525270462036, 'vf_explained_var': 0.25463879108428955, 'entropy': 4.86466121673584, 'total_loss': 54.90930938720703, 'kl': 0.016139768064022064}, 'load_time_ms': 0.709, 'num_steps_sampled': 805200, 'update_time_ms': 2.612}",671,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",44.69726085662842,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,37200,805200,{},31,203,-44.024063786882884,2025-09-04_23-42-56,8.00003487371069,3651947,1757022176,0.9380072839085655,26552.584975242615,44039,5.935960591133005
+cda-server-2,False,1443.2561042308807,"{'sample_time_ms': 44905.554, 'num_steps_trained': 806400, 'grad_time_ms': 373.207, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 25.35186004638672, 'cur_kl_coeff': 2.278125047683716, 'policy_loss': -0.12760642170906067, 'vf_explained_var': 0.2716485261917114, 'entropy': 4.429327487945557, 'total_loss': 25.25782012939453, 'kl': 0.014733772724866867}, 'load_time_ms': 0.713, 'num_steps_sampled': 806400, 'update_time_ms': 2.597}",672,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",45.849364280700684,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,38400,806400,{},32,222,-31.323198545069644,2025-09-04_23-43-42,8.000037960439514,3651947,1757022222,1.8188486432445576,26598.434339523315,44261,5.396396396396397
+cda-server-2,False,1489.0317306518555,"{'sample_time_ms': 44966.614, 'num_steps_trained': 807600, 'grad_time_ms': 373.81, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 25.199703216552734, 'cur_kl_coeff': 2.278125047683716, 'policy_loss': -0.11255758255720139, 'vf_explained_var': 0.30344411730766296, 'entropy': 4.501504898071289, 'total_loss': 25.113109588623047, 'kl': 0.011397127993404865}, 'load_time_ms': 0.717, 'num_steps_sampled': 807600, 'update_time_ms': 2.587}",673,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",45.77562642097473,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,39600,807600,{},33,226,-36.46241334684141,2025-09-04_23-44-28,8.0001096686023,3651947,1757022268,2.0325452432038214,26644.20996594429,44487,5.283185840707965
+cda-server-2,False,1533.8942770957947,"{'sample_time_ms': 44900.098, 'num_steps_trained': 808800, 'grad_time_ms': 373.993, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 32.96345138549805, 'cur_kl_coeff': 2.278125047683716, 'policy_loss': -0.11006785929203033, 'vf_explained_var': 0.3334667384624481, 'entropy': 5.090214729309082, 'total_loss': 32.88323974609375, 'kl': 0.013107547536492348}, 'load_time_ms': 0.716, 'num_steps_sampled': 808800, 'update_time_ms': 2.574}",674,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",44.86254644393921,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,40800,808800,{},34,202,-26.312977717310936,2025-09-04_23-45-13,8.000104340232854,3651947,1757022313,1.11009000835695,26689.07251238823,44689,5.871287128712871
+cda-server-2,False,1578.7702877521515,"{'sample_time_ms': 44919.257, 'num_steps_trained': 810000, 'grad_time_ms': 375.022, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 51.09336853027344, 'cur_kl_coeff': 2.278125047683716, 'policy_loss': -0.1132146418094635, 'vf_explained_var': 0.26288503408432007, 'entropy': 4.930473327636719, 'total_loss': 51.00904083251953, 'kl': 0.012678191065788269}, 'load_time_ms': 0.721, 'num_steps_sampled': 810000, 'update_time_ms': 2.552}",675,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",44.87601065635681,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,42000,810000,{},35,206,-51.0417323859044,2025-09-04_23-45-58,8.00026226483387,3651947,1757022358,1.0642628138436567,26733.948523044586,44895,5.922330097087379
+cda-server-2,False,1623.6381359100342,"{'sample_time_ms': 44872.54, 'num_steps_trained': 811200, 'grad_time_ms': 377.163, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 47.72408676147461, 'cur_kl_coeff': 2.278125047683716, 'policy_loss': -0.12084172666072845, 'vf_explained_var': 0.289846271276474, 'entropy': 4.756955623626709, 'total_loss': 47.63309097290039, 'kl': 0.013101667165756226}, 'load_time_ms': 0.714, 'num_steps_sampled': 811200, 'update_time_ms': 2.574}",676,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",44.86784815788269,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,43200,811200,{},36,205,-41.855391642439734,2025-09-04_23-46-43,8.00021452106471,3651947,1757022403,1.08842748446094,26778.81637120247,45100,5.8585365853658535
+cda-server-2,False,1669.1616306304932,"{'sample_time_ms': 44935.677, 'num_steps_trained': 812400, 'grad_time_ms': 378.799, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 46.421443939208984, 'cur_kl_coeff': 2.278125047683716, 'policy_loss': -0.1014418676495552, 'vf_explained_var': 0.26303741335868835, 'entropy': 4.09953498840332, 'total_loss': 46.349037170410156, 'kl': 0.012744201347231865}, 'load_time_ms': 0.719, 'num_steps_sampled': 812400, 'update_time_ms': 2.601}",677,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",45.523494720458984,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,44400,812400,{},37,221,-53.92958741396319,2025-09-04_23-47-28,8.000100413163354,3651947,1757022448,1.725258672383097,26824.339865922928,45321,5.4434389140271495
+cda-server-2,False,1714.7320952415466,"{'sample_time_ms': 44906.237, 'num_steps_trained': 813600, 'grad_time_ms': 376.05, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 25.21841812133789, 'cur_kl_coeff': 2.278125047683716, 'policy_loss': -0.11103672534227371, 'vf_explained_var': 0.3182068467140198, 'entropy': 4.209366321563721, 'total_loss': 25.141395568847656, 'kl': 0.01492943987250328}, 'load_time_ms': 0.705, 'num_steps_sampled': 813600, 'update_time_ms': 2.602}",678,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",45.57046461105347,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,45600,813600,{},38,236,-28.413637460438224,2025-09-04_23-48-14,8.000458522212895,3651947,1757022494,2.3552411870206993,26869.91033053398,45557,5.084745762711864
+cda-server-2,False,1762.8893086910248,"{'sample_time_ms': 45143.221, 'num_steps_trained': 814800, 'grad_time_ms': 375.308, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 26.07421112060547, 'cur_kl_coeff': 2.278125047683716, 'policy_loss': -0.11071144044399261, 'vf_explained_var': 0.3554925322532654, 'entropy': 3.9571533203125, 'total_loss': 25.987592697143555, 'kl': 0.010575653985142708}, 'load_time_ms': 0.699, 'num_steps_sampled': 814800, 'update_time_ms': 2.618}",679,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",48.15721344947815,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,46800,814800,{},39,244,-31.20026672780964,2025-09-04_23-49-02,10.0,3651947,1757022542,2.7279709710992877,26918.06754398346,45801,4.844262295081967
+cda-server-2,False,1808.5713317394257,"{'sample_time_ms': 45203.036, 'num_steps_trained': 816000, 'grad_time_ms': 374.193, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 39.99162292480469, 'cur_kl_coeff': 2.278125047683716, 'policy_loss': -0.11236973851919174, 'vf_explained_var': 0.2974632978439331, 'entropy': 4.766204357147217, 'total_loss': 39.909339904785156, 'kl': 0.01320748869329691}, 'load_time_ms': 0.699, 'num_steps_sampled': 816000, 'update_time_ms': 2.574}",680,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",45.68202304840088,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,48000,816000,{},40,224,-42.005266572671324,2025-09-04_23-49-48,8.000121456246195,3651947,1757022588,1.8229990918099992,26963.74956703186,46025,5.428571428571429
+cda-server-2,False,1854.3741126060486,"{'sample_time_ms': 45311.67, 'num_steps_trained': 817200, 'grad_time_ms': 375.919, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 20.1705322265625, 'cur_kl_coeff': 2.278125047683716, 'policy_loss': -0.10597968846559525, 'vf_explained_var': 0.37855952978134155, 'entropy': 4.278032302856445, 'total_loss': 20.095693588256836, 'kl': 0.013669062405824661}, 'load_time_ms': 0.698, 'num_steps_sampled': 817200, 'update_time_ms': 2.653}",681,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",45.802780866622925,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,49200,817200,{},41,230,-23.039651296811073,2025-09-04_23-50-34,8.000111549539966,3651947,1757022634,2.119041598304144,27009.552347898483,46255,5.2043478260869565
+cda-server-2,False,1900.3360126018524,"{'sample_time_ms': 45322.935, 'num_steps_trained': 818400, 'grad_time_ms': 375.914, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 33.26652526855469, 'cur_kl_coeff': 2.278125047683716, 'policy_loss': -0.0896776020526886, 'vf_explained_var': 0.2946290075778961, 'entropy': 3.887026786804199, 'total_loss': 33.2002067565918, 'kl': 0.010254154913127422}, 'load_time_ms': 0.695, 'num_steps_sampled': 818400, 'update_time_ms': 2.661}",682,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",45.96189999580383,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,50400,818400,{},42,247,-52.97668807427179,2025-09-04_23-51-20,8.000153198259685,3651947,1757022680,2.6885484381136426,27055.514247894287,46502,4.874493927125506
+cda-server-2,False,1946.71591258049,"{'sample_time_ms': 45384.9, 'num_steps_trained': 819600, 'grad_time_ms': 374.42, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 20.370283126831055, 'cur_kl_coeff': 2.278125047683716, 'policy_loss': -0.10092522203922272, 'vf_explained_var': 0.30661991238594055, 'entropy': 3.812130928039551, 'total_loss': 20.326799392700195, 'kl': 0.025214217603206635}, 'load_time_ms': 0.686, 'num_steps_sampled': 819600, 'update_time_ms': 2.657}",683,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",46.379899978637695,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,51600,819600,{},43,248,-25.692715770948595,2025-09-04_23-52-06,8.00004353166473,3651947,1757022726,2.7354757145783526,27101.894147872925,46750,4.826612903225806
+cda-server-2,False,1992.2334678173065,"{'sample_time_ms': 45450.43, 'num_steps_trained': 820800, 'grad_time_ms': 374.387, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 38.21272277832031, 'cur_kl_coeff': 3.417187452316284, 'policy_loss': -0.09470480680465698, 'vf_explained_var': 0.2592650055885315, 'entropy': 4.263044357299805, 'total_loss': 38.15930938720703, 'kl': 0.01208446267992258}, 'load_time_ms': 0.685, 'num_steps_sampled': 820800, 'update_time_ms': 2.658}",684,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",45.517555236816406,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,52800,820800,{},44,230,-45.83795802461822,2025-09-04_23-52-51,8.000986299434812,3651947,1757022771,2.1549766711363594,27147.41170310974,46980,5.2
+cda-server-2,False,2038.8074345588684,"{'sample_time_ms': 45623.539, 'num_steps_trained': 822000, 'grad_time_ms': 371.069, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 19.502605438232422, 'cur_kl_coeff': 3.417187452316284, 'policy_loss': -0.09870563447475433, 'vf_explained_var': 0.3662871718406677, 'entropy': 3.827545166015625, 'total_loss': 19.437639236450195, 'kl': 0.00987254548817873}, 'load_time_ms': 0.679, 'num_steps_sampled': 822000, 'update_time_ms': 2.675}",685,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",46.57396674156189,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,54000,822000,{},45,246,-27.886526359770727,2025-09-04_23-53-38,8.000052507444792,3651947,1757022818,2.6572154273389628,27193.985669851303,47226,4.8861788617886175
+cda-server-2,False,2084.5611431598663,"{'sample_time_ms': 45714.511, 'num_steps_trained': 823200, 'grad_time_ms': 368.644, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 26.020296096801758, 'cur_kl_coeff': 3.417187452316284, 'policy_loss': -0.0952225998044014, 'vf_explained_var': 0.36607956886291504, 'entropy': 4.083105087280273, 'total_loss': 25.966880798339844, 'kl': 0.012233107350766659}, 'load_time_ms': 0.675, 'num_steps_sampled': 823200, 'update_time_ms': 2.651}",686,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",45.753708600997925,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,55200,823200,{},46,233,-31.675725465750638,2025-09-04_23-54-24,8.000062316218074,3651947,1757022864,2.2040891490296857,27239.7393784523,47459,5.1373390557939915
+cda-server-2,False,2130.6540355682373,"{'sample_time_ms': 45771.937, 'num_steps_trained': 824400, 'grad_time_ms': 368.183, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 27.39217758178711, 'cur_kl_coeff': 3.417187452316284, 'policy_loss': -0.09471426904201508, 'vf_explained_var': 0.32949918508529663, 'entropy': 4.074531555175781, 'total_loss': 27.33019256591797, 'kl': 0.009578406810760498}, 'load_time_ms': 0.675, 'num_steps_sampled': 824400, 'update_time_ms': 2.624}",687,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",46.09289240837097,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,56400,824400,{},47,240,-28.73202633247127,2025-09-04_23-55-10,8.000046742088735,3651947,1757022910,2.5360759930993373,27285.832270860672,47699,4.975
+cda-server-2,False,2177.10223197937,"{'sample_time_ms': 45858.23, 'num_steps_trained': 825600, 'grad_time_ms': 369.739, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 24.265281677246094, 'cur_kl_coeff': 3.417187452316284, 'policy_loss': -0.08551517128944397, 'vf_explained_var': 0.3369382619857788, 'entropy': 3.921107769012451, 'total_loss': 24.211639404296875, 'kl': 0.009327537380158901}, 'load_time_ms': 0.676, 'num_steps_sampled': 825600, 'update_time_ms': 2.627}",688,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",46.44819641113281,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,57600,825600,{},48,234,-28.18351258581726,2025-09-04_23-55-56,8.000037329418017,3651947,1757022956,2.2101262666385373,27332.280467271805,47933,5.162393162393163
+cda-server-2,False,2222.396988391876,"{'sample_time_ms': 45572.549, 'num_steps_trained': 826800, 'grad_time_ms': 369.215, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 18.31333351135254, 'cur_kl_coeff': 3.417187452316284, 'policy_loss': -0.11753928661346436, 'vf_explained_var': 0.3489692211151123, 'entropy': 4.211871147155762, 'total_loss': 18.22791290283203, 'kl': 0.009398790076375008}, 'load_time_ms': 0.68, 'num_steps_sampled': 826800, 'update_time_ms': 2.567}",689,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",45.2947564125061,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,58800,826800,{},49,236,-21.46640907944748,2025-09-04_23-56-42,8.000062387216154,3651947,1757023002,2.3390708113989294,27377.57522368431,48169,5.101694915254237
+cda-server-2,False,2268.6880073547363,"{'sample_time_ms': 45632.634, 'num_steps_trained': 828000, 'grad_time_ms': 370.06, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 22.923683166503906, 'cur_kl_coeff': 3.417187452316284, 'policy_loss': -0.10533839464187622, 'vf_explained_var': 0.3062151074409485, 'entropy': 4.0076003074646, 'total_loss': 22.845335006713867, 'kl': 0.007898930460214615}, 'load_time_ms': 0.678, 'num_steps_sampled': 828000, 'update_time_ms': 2.59}",690,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",46.29101896286011,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,60000,828000,{},50,236,-35.438403936551865,2025-09-04_23-57-28,8.000014463707025,3651947,1757023048,2.2960326855448656,27423.86624264717,48405,5.080508474576271
+cda-server-2,False,2314.1386063098907,"{'sample_time_ms': 45598.0, 'num_steps_trained': 829200, 'grad_time_ms': 369.685, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 39.06608581542969, 'cur_kl_coeff': 3.417187452316284, 'policy_loss': -0.10068418085575104, 'vf_explained_var': 0.30122214555740356, 'entropy': 4.316643238067627, 'total_loss': 38.994544982910156, 'kl': 0.00852908380329609}, 'load_time_ms': 0.678, 'num_steps_sampled': 829200, 'update_time_ms': 2.463}",691,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",45.45059895515442,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,61200,829200,{},51,222,-44.2886555456217,2025-09-04_23-58-14,8.0000136264972,3651947,1757023094,1.8110764046072114,27469.316841602325,48627,5.400900900900901
+cda-server-2,False,2360.267109632492,"{'sample_time_ms': 45615.099, 'num_steps_trained': 830400, 'grad_time_ms': 369.287, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 28.4263858795166, 'cur_kl_coeff': 3.417187452316284, 'policy_loss': -0.0924384593963623, 'vf_explained_var': 0.311334490776062, 'entropy': 4.158641815185547, 'total_loss': 28.411972045898438, 'kl': 0.022832728922367096}, 'load_time_ms': 0.681, 'num_steps_sampled': 830400, 'update_time_ms': 2.45}",692,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",46.12850332260132,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,62400,830400,{},52,231,-28.544174064684533,2025-09-04_23-59-00,8.000075723233019,3651947,1757023140,2.1830580373795043,27515.445344924927,48858,5.207792207792208
+cda-server-2,False,2406.292500257492,"{'sample_time_ms': 45579.713, 'num_steps_trained': 831600, 'grad_time_ms': 369.222, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 18.492910385131836, 'cur_kl_coeff': 5.125781059265137, 'policy_loss': -0.0924345999956131, 'vf_explained_var': 0.3897292912006378, 'entropy': 3.4774138927459717, 'total_loss': 18.42829132080078, 'kl': 0.005427065305411816}, 'load_time_ms': 0.682, 'num_steps_sampled': 831600, 'update_time_ms': 2.447}",693,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",46.025390625,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,63600,831600,{},53,255,-33.99333575272079,2025-09-04_23-59-46,8.000483205321288,3651947,1757023186,2.9133893310222883,27561.470735549927,49113,4.705882352941177
+cda-server-2,False,2453.794445037842,"{'sample_time_ms': 45776.969, 'num_steps_trained': 832800, 'grad_time_ms': 370.372, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 19.945158004760742, 'cur_kl_coeff': 5.125781059265137, 'policy_loss': -0.08766285330057144, 'vf_explained_var': 0.3745752274990082, 'entropy': 3.586599111557007, 'total_loss': 19.889028549194336, 'kl': 0.006151752080768347}, 'load_time_ms': 0.69, 'num_steps_sampled': 832800, 'update_time_ms': 2.423}",694,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",47.50194478034973,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,64800,832800,{},54,250,-35.834870898374945,2025-09-05_00-00-33,8.000053038291991,3651947,1757023233,2.8140092911234733,27608.972680330276,49363,4.796
+cda-server-2,False,2500.2112040519714,"{'sample_time_ms': 45760.551, 'num_steps_trained': 834000, 'grad_time_ms': 371.106, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 21.671066284179688, 'cur_kl_coeff': 5.125781059265137, 'policy_loss': -0.08804267644882202, 'vf_explained_var': 0.36468225717544556, 'entropy': 3.9830448627471924, 'total_loss': 21.612377166748047, 'kl': 0.005727276671677828}, 'load_time_ms': 0.691, 'num_steps_sampled': 834000, 'update_time_ms': 2.409}",695,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",46.41675901412964,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,66000,834000,{},55,242,-28.959408747237276,2025-09-05_00-01-20,8.000110532415139,3651947,1757023280,2.6203789680064515,27655.389439344406,49605,4.917355371900826
+cda-server-2,False,2548.130709171295,"{'sample_time_ms': 45975.987, 'num_steps_trained': 835200, 'grad_time_ms': 372.303, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 23.02083396911621, 'cur_kl_coeff': 5.125781059265137, 'policy_loss': -0.08154700696468353, 'vf_explained_var': 0.3930407762527466, 'entropy': 3.54258394241333, 'total_loss': 22.966352462768555, 'kl': 0.005280703771859407}, 'load_time_ms': 0.693, 'num_steps_sampled': 835200, 'update_time_ms': 2.399}",696,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",47.91950511932373,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,67200,835200,{},56,250,-36.17692362531083,2025-09-05_00-02-08,8.000071122194017,3651947,1757023328,2.790335409871374,27703.30894446373,49855,4.804
+cda-server-2,False,2594.7809772491455,"{'sample_time_ms': 46031.904, 'num_steps_trained': 836400, 'grad_time_ms': 372.116, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 24.553421020507812, 'cur_kl_coeff': 5.125781059265137, 'policy_loss': -0.09851668775081635, 'vf_explained_var': 0.3637484610080719, 'entropy': 3.9232969284057617, 'total_loss': 24.485055923461914, 'kl': 0.005882933735847473}, 'load_time_ms': 0.686, 'num_steps_sampled': 836400, 'update_time_ms': 2.394}",697,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",46.65026807785034,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,68400,836400,{},57,244,-29.634267765182685,2025-09-05_00-02-54,8.000422985103917,3651947,1757023374,2.6535923777146553,27749.95921254158,50099,4.905737704918033
+cda-server-2,False,2641.803017616272,"{'sample_time_ms': 46089.789, 'num_steps_trained': 837600, 'grad_time_ms': 371.533, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 20.05363655090332, 'cur_kl_coeff': 5.125781059265137, 'policy_loss': -0.08109728991985321, 'vf_explained_var': 0.3776855766773224, 'entropy': 3.494898796081543, 'total_loss': 19.998289108276367, 'kl': 0.005023529753088951}, 'load_time_ms': 0.68, 'num_steps_sampled': 837600, 'update_time_ms': 2.406}",698,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",47.022040367126465,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,69600,837600,{},58,255,-31.342953328465626,2025-09-05_00-03-41,8.00002653177848,3651947,1757023421,2.8137301623159416,27796.981252908707,50354,4.772549019607843
+cda-server-2,False,2689.7521953582764,"{'sample_time_ms': 46353.701, 'num_steps_trained': 838800, 'grad_time_ms': 373.008, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 23.673885345458984, 'cur_kl_coeff': 5.125781059265137, 'policy_loss': -0.08906450867652893, 'vf_explained_var': 0.37318751215934753, 'entropy': 3.4568328857421875, 'total_loss': 23.611942291259766, 'kl': 0.00529090128839016}, 'load_time_ms': 0.682, 'num_steps_sampled': 838800, 'update_time_ms': 2.465}",699,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",47.949177742004395,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,70800,838800,{},59,246,-34.01843889200904,2025-09-05_00-04-29,8.00003337242132,3651947,1757023469,2.676038276358482,27844.93043065071,50600,4.853658536585366
+cda-server-2,False,2735.9882276058197,"{'sample_time_ms': 46350.661, 'num_steps_trained': 840000, 'grad_time_ms': 370.582, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 18.364973068237305, 'cur_kl_coeff': 5.125781059265137, 'policy_loss': -0.0899556428194046, 'vf_explained_var': 0.33644577860832214, 'entropy': 3.8288726806640625, 'total_loss': 18.32174301147461, 'kl': 0.009115674532949924}, 'load_time_ms': 0.677, 'num_steps_sampled': 840000, 'update_time_ms': 2.46}",700,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",46.236032247543335,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,72000,840000,{},60,240,-26.072475345441987,2025-09-05_00-05-16,8.000126565405562,3651947,1757023516,2.4382448170064226,27891.166462898254,50840,5.008333333333334
+cda-server-2,False,2782.124693632126,"{'sample_time_ms': 46418.885, 'num_steps_trained': 841200, 'grad_time_ms': 370.797, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 18.015798568725586, 'cur_kl_coeff': 5.125781059265137, 'policy_loss': -0.0949694961309433, 'vf_explained_var': 0.38648679852485657, 'entropy': 3.605668783187866, 'total_loss': 17.95746612548828, 'kl': 0.007147365249693394}, 'load_time_ms': 0.669, 'num_steps_sampled': 841200, 'update_time_ms': 2.613}",701,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",46.13646602630615,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,73200,841200,{},61,247,-32.53581779156435,2025-09-05_00-06-02,8.000731287254961,3651947,1757023562,2.7484119177474615,27937.30292892456,51087,4.838056680161944
+cda-server-2,False,2828.820511817932,"{'sample_time_ms': 46474.504, 'num_steps_trained': 842400, 'grad_time_ms': 371.866, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 22.48615837097168, 'cur_kl_coeff': 5.125781059265137, 'policy_loss': -0.0782257467508316, 'vf_explained_var': 0.38535141944885254, 'entropy': 3.672297954559326, 'total_loss': 22.43346405029297, 'kl': 0.004980933386832476}, 'load_time_ms': 0.667, 'num_steps_sampled': 842400, 'update_time_ms': 2.645}",702,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",46.695818185806274,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,74400,842400,{},62,249,-30.512234717932195,2025-09-05_00-06-48,8.000339213512484,3651947,1757023608,2.7417556246248953,27983.998747110367,51336,4.815261044176707
+cda-server-2,False,2875.672497034073,"{'sample_time_ms': 46553.505, 'num_steps_trained': 843600, 'grad_time_ms': 375.505, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 10.897947311401367, 'cur_kl_coeff': 2.5628905296325684, 'policy_loss': -0.10184511542320251, 'vf_explained_var': 0.44461530447006226, 'entropy': 3.5678024291992188, 'total_loss': 10.819283485412598, 'kl': 0.009044832549989223}, 'load_time_ms': 0.67, 'num_steps_sampled': 843600, 'update_time_ms': 2.632}",703,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",46.85198521614075,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,75600,843600,{},63,253,-15.637942489142603,2025-09-05_00-07-35,8.000093791804058,3651947,1757023655,2.8552245182600067,28030.850732326508,51589,4.762845849802371
+cda-server-2,False,2922.5279846191406,"{'sample_time_ms': 46490.105, 'num_steps_trained': 844800, 'grad_time_ms': 374.243, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 21.747812271118164, 'cur_kl_coeff': 2.5628905296325684, 'policy_loss': -0.08856526017189026, 'vf_explained_var': 0.3574591875076294, 'entropy': 3.3065967559814453, 'total_loss': 21.676549911499023, 'kl': 0.00675173569470644}, 'load_time_ms': 0.663, 'num_steps_sampled': 844800, 'update_time_ms': 2.65}",704,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",46.85548758506775,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,76800,844800,{},64,256,-31.094574812658422,2025-09-05_00-08-22,8.000045120574933,3651947,1757023702,2.968602281348618,28077.706219911575,51845,4.6875
+cda-server-2,False,2968.942771911621,"{'sample_time_ms': 46487.444, 'num_steps_trained': 846000, 'grad_time_ms': 376.6, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 20.095355987548828, 'cur_kl_coeff': 2.5628905296325684, 'policy_loss': -0.09232830256223679, 'vf_explained_var': 0.41118118166923523, 'entropy': 3.3624517917633057, 'total_loss': 20.024757385253906, 'kl': 0.008477847091853619}, 'load_time_ms': 0.668, 'num_steps_sampled': 846000, 'update_time_ms': 2.681}",705,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",46.41478729248047,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,78000,846000,{},65,253,-30.661359582329993,2025-09-05_00-09-09,8.00091015858126,3651947,1757023749,2.875977254342749,28124.121007204056,52098,4.7272727272727275
+cda-server-2,False,3015.3129580020905,"{'sample_time_ms': 46332.031, 'num_steps_trained': 847200, 'grad_time_ms': 376.964, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 10.395467758178711, 'cur_kl_coeff': 2.5628905296325684, 'policy_loss': -0.08874928951263428, 'vf_explained_var': 0.48560789227485657, 'entropy': 2.913358211517334, 'total_loss': 10.328511238098145, 'kl': 0.008503603748977184}, 'load_time_ms': 0.663, 'num_steps_sampled': 847200, 'update_time_ms': 2.687}",706,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",46.37018609046936,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,79200,847200,{},66,269,-25.203466682672065,2025-09-05_00-09-55,8.000071265521461,3651947,1757023795,3.267096453091576,28170.491193294525,52367,4.4684014869888475
+cda-server-2,False,3062.5225052833557,"{'sample_time_ms': 46387.344, 'num_steps_trained': 848400, 'grad_time_ms': 377.588, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 10.933013916015625, 'cur_kl_coeff': 2.5628905296325684, 'policy_loss': -0.07779070734977722, 'vf_explained_var': 0.43059083819389343, 'entropy': 2.923530340194702, 'total_loss': 10.891273498535156, 'kl': 0.014066466130316257}, 'load_time_ms': 0.666, 'num_steps_sampled': 848400, 'update_time_ms': 2.712}",707,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",47.20954728126526,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,80400,848400,{},67,272,-14.993927343121719,2025-09-05_00-10-42,8.00002406751684,3651947,1757023842,3.4807626959049185,28217.70074057579,52639,4.360294117647059
+cda-server-2,False,3109.1843745708466,"{'sample_time_ms': 46350.702, 'num_steps_trained': 849600, 'grad_time_ms': 378.268, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 14.548519134521484, 'cur_kl_coeff': 2.5628905296325684, 'policy_loss': -0.08129000663757324, 'vf_explained_var': 0.40845823287963867, 'entropy': 3.379908561706543, 'total_loss': 14.496715545654297, 'kl': 0.011505262926220894}, 'load_time_ms': 0.674, 'num_steps_sampled': 849600, 'update_time_ms': 2.661}",708,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",46.661869287490845,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,81600,849600,{},68,263,-32.35425534450611,2025-09-05_00-11-29,8.000000506413096,3651947,1757023889,3.045566337120995,28264.36260986328,52902,4.634980988593156
+cda-server-2,False,3156.0025663375854,"{'sample_time_ms': 46236.685, 'num_steps_trained': 850800, 'grad_time_ms': 379.107, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 19.041440963745117, 'cur_kl_coeff': 2.5628905296325684, 'policy_loss': -0.07008924335241318, 'vf_explained_var': 0.4180201590061188, 'entropy': 3.1364493370056152, 'total_loss': 18.986976623535156, 'kl': 0.006095509976148605}, 'load_time_ms': 0.67, 'num_steps_sampled': 850800, 'update_time_ms': 2.651}",709,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",46.81819176673889,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,82800,850800,{},69,264,-38.008709475992845,2025-09-05_00-12-16,8.000051236475214,3651947,1757023936,3.1849787357643424,28311.18080163002,53166,4.526515151515151
+cda-server-2,False,3202.405710220337,"{'sample_time_ms': 46252.736, 'num_steps_trained': 852000, 'grad_time_ms': 379.714, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 13.956635475158691, 'cur_kl_coeff': 2.5628905296325684, 'policy_loss': -0.08226403594017029, 'vf_explained_var': 0.4574727416038513, 'entropy': 3.1847519874572754, 'total_loss': 13.90140151977539, 'kl': 0.010546525940299034}, 'load_time_ms': 0.67, 'num_steps_sampled': 852000, 'update_time_ms': 2.643}",710,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",46.403143882751465,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,84000,852000,{},70,262,-26.30637679999954,2025-09-05_00-13-02,8.000196248389745,3651947,1757023982,3.091944312839223,28357.58394551277,53428,4.587786259541985
+cda-server-2,False,3248.526032924652,"{'sample_time_ms': 46251.395, 'num_steps_trained': 853200, 'grad_time_ms': 379.431, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 8.423760414123535, 'cur_kl_coeff': 2.5628905296325684, 'policy_loss': -0.09352359175682068, 'vf_explained_var': 0.5000796318054199, 'entropy': 2.9549989700317383, 'total_loss': 8.35595989227295, 'kl': 0.010036887601017952}, 'load_time_ms': 0.667, 'num_steps_sampled': 853200, 'update_time_ms': 2.579}",711,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",46.120322704315186,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,85200,853200,{},71,268,-21.370431055449654,2025-09-05_00-13-48,8.000026311516713,3651947,1757024028,3.2506917069680656,28403.704268217087,53696,4.4888059701492535
+cda-server-2,False,3295.822465658188,"{'sample_time_ms': 46310.371, 'num_steps_trained': 854400, 'grad_time_ms': 380.46, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 9.886219024658203, 'cur_kl_coeff': 2.5628905296325684, 'policy_loss': -0.08912031352519989, 'vf_explained_var': 0.4919545352458954, 'entropy': 2.9815707206726074, 'total_loss': 9.818304061889648, 'kl': 0.008273966610431671}, 'load_time_ms': 0.672, 'num_steps_sampled': 854400, 'update_time_ms': 2.573}",712,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",47.29643273353577,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,86400,854400,{},72,271,-20.25757557351079,2025-09-05_00-14-36,8.000048798868667,3651947,1757024076,3.364667535127587,28451.000700950623,53967,4.424354243542435
+cda-server-2,False,3342.28800201416,"{'sample_time_ms': 46272.646, 'num_steps_trained': 855600, 'grad_time_ms': 379.513, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 11.11208438873291, 'cur_kl_coeff': 2.5628905296325684, 'policy_loss': -0.09259523451328278, 'vf_explained_var': 0.45920121669769287, 'entropy': 3.3032469749450684, 'total_loss': 11.049617767333984, 'kl': 0.011755743995308876}, 'load_time_ms': 0.664, 'num_steps_sampled': 855600, 'update_time_ms': 2.612}",713,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",46.46553635597229,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,87600,855600,{},73,260,-18.00896345769336,2025-09-05_00-15-22,8.000019869201463,3651947,1757024122,3.0988931512054565,28497.466237306595,54227,4.588461538461538
+cda-server-2,False,3389.0381722450256,"{'sample_time_ms': 46259.423, 'num_steps_trained': 856800, 'grad_time_ms': 382.153, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 15.24755859375, 'cur_kl_coeff': 2.5628905296325684, 'policy_loss': -0.08448615670204163, 'vf_explained_var': 0.4383828938007355, 'entropy': 3.083890199661255, 'total_loss': 15.185324668884277, 'kl': 0.008682480081915855}, 'load_time_ms': 0.661, 'num_steps_sampled': 856800, 'update_time_ms': 2.62}",714,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",46.75017023086548,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,88800,856800,{},74,262,-26.652847832941326,2025-09-05_00-16-09,8.000045001535248,3651947,1757024169,3.148076696258404,28544.21640753746,54489,4.561068702290076
+cda-server-2,False,3436.1214196681976,"{'sample_time_ms': 46327.623, 'num_steps_trained': 858000, 'grad_time_ms': 380.765, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 8.626786231994629, 'cur_kl_coeff': 2.5628905296325684, 'policy_loss': -0.08935275673866272, 'vf_explained_var': 0.4825197458267212, 'entropy': 2.945988416671753, 'total_loss': 8.564618110656738, 'kl': 0.010607601143419743}, 'load_time_ms': 0.648, 'num_steps_sampled': 858000, 'update_time_ms': 2.653}",715,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",47.083247423172,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,90000,858000,{},75,273,-20.310087306013106,2025-09-05_00-16-56,8.000071305921006,3651947,1757024216,3.364069705333792,28591.299654960632,54762,4.428571428571429
+cda-server-2,False,3482.877682209015,"{'sample_time_ms': 46367.026, 'num_steps_trained': 859200, 'grad_time_ms': 379.963, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 5.832857131958008, 'cur_kl_coeff': 2.5628905296325684, 'policy_loss': -0.08042430877685547, 'vf_explained_var': 0.5615320205688477, 'entropy': 3.0603370666503906, 'total_loss': 5.779318809509277, 'kl': 0.01049027033150196}, 'load_time_ms': 0.661, 'num_steps_sampled': 859200, 'update_time_ms': 2.723}",716,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",46.75626254081726,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,91200,859200,{},76,269,-17.04546011809048,2025-09-05_00-17-43,8.000278447989203,3651947,1757024263,3.3099764630801607,28638.05591750145,55031,4.4646840148698885
+cda-server-2,False,3529.5195393562317,"{'sample_time_ms': 46310.512, 'num_steps_trained': 860400, 'grad_time_ms': 379.64, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 15.390300750732422, 'cur_kl_coeff': 2.5628905296325684, 'policy_loss': -0.09058582782745361, 'vf_explained_var': 0.4222276210784912, 'entropy': 3.284696340560913, 'total_loss': 15.33092212677002, 'kl': 0.012176419608294964}, 'load_time_ms': 0.661, 'num_steps_sampled': 860400, 'update_time_ms': 2.732}",717,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",46.6418571472168,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,92400,860400,{},77,256,-29.825870573145558,2025-09-05_00-18-29,8.000019361627396,3651947,1757024309,2.9447175589626724,28684.697774648666,55287,4.6796875
+cda-server-2,False,3576.730393886566,"{'sample_time_ms': 46364.957, 'num_steps_trained': 861600, 'grad_time_ms': 380.013, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 11.509320259094238, 'cur_kl_coeff': 2.5628905296325684, 'policy_loss': -0.08916350454092026, 'vf_explained_var': 0.4413018822669983, 'entropy': 2.8452816009521484, 'total_loss': 11.440098762512207, 'kl': 0.007780800107866526}, 'load_time_ms': 0.659, 'num_steps_sampled': 861600, 'update_time_ms': 2.79}",718,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",47.21085453033447,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,93600,861600,{},78,272,-35.58424454992572,2025-09-05_00-19-17,8.000145093826061,3651947,1757024357,3.3670553265429093,28731.908629179,55559,4.415441176470588
+cda-server-2,False,3623.8617174625397,"{'sample_time_ms': 46396.216, 'num_steps_trained': 862800, 'grad_time_ms': 380.105, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 10.809052467346191, 'cur_kl_coeff': 2.5628905296325684, 'policy_loss': -0.06924965977668762, 'vf_explained_var': 0.4762766659259796, 'entropy': 2.7126989364624023, 'total_loss': 10.769659996032715, 'kl': 0.011649557389318943}, 'load_time_ms': 0.66, 'num_steps_sampled': 862800, 'update_time_ms': 2.76}",719,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",47.13132357597351,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,94800,862800,{},79,273,-22.089529522759932,2025-09-05_00-20-04,8.000017898641568,3651947,1757024404,3.42979719643761,28779.039952754974,55832,4.373626373626373
+cda-server-2,False,3671.192057609558,"{'sample_time_ms': 46488.954, 'num_steps_trained': 864000, 'grad_time_ms': 380.06, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 3.241161346435547, 'cur_kl_coeff': 2.5628905296325684, 'policy_loss': -0.0673958882689476, 'vf_explained_var': 0.6360262036323547, 'entropy': 2.518385887145996, 'total_loss': 3.212399482727051, 'kl': 0.015074353665113449}, 'load_time_ms': 0.669, 'num_steps_sampled': 864000, 'update_time_ms': 2.791}",720,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",47.33034014701843,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,96000,864000,{},80,278,-15.266491252988406,2025-09-05_00-20-51,8.000536107095432,3651947,1757024451,3.5300221895315302,28826.370292901993,56110,4.330935251798561
+cda-server-2,False,3717.9396743774414,"{'sample_time_ms': 46551.81, 'num_steps_trained': 865200, 'grad_time_ms': 380.034, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 11.02731990814209, 'cur_kl_coeff': 2.5628905296325684, 'policy_loss': -0.07797694206237793, 'vf_explained_var': 0.48665347695350647, 'entropy': 2.7421581745147705, 'total_loss': 10.972485542297363, 'kl': 0.009030384942889214}, 'load_time_ms': 0.67, 'num_steps_sampled': 865200, 'update_time_ms': 2.727}",721,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",46.7476167678833,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,97200,865200,{},81,277,-33.26573092002807,2025-09-05_00-21-38,8.001062685075288,3651947,1757024498,3.47922257909384,28873.117909669876,56387,4.346570397111913
+cda-server-2,False,3765.3334896564484,"{'sample_time_ms': 46562.748, 'num_steps_trained': 866400, 'grad_time_ms': 378.782, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 9.901885986328125, 'cur_kl_coeff': 2.5628905296325684, 'policy_loss': -0.08068516850471497, 'vf_explained_var': 0.5179368853569031, 'entropy': 2.73091983795166, 'total_loss': 9.847756385803223, 'kl': 0.01036145631223917}, 'load_time_ms': 0.669, 'num_steps_sampled': 866400, 'update_time_ms': 2.696}",722,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",47.39381527900696,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,98400,866400,{},82,270,-19.923465788829557,2025-09-05_00-22-25,8.000151182972646,3651947,1757024545,3.3268052655764717,28920.511724948883,56657,4.440740740740741
+cda-server-2,False,3811.302745103836,"{'sample_time_ms': 46512.378, 'num_steps_trained': 867600, 'grad_time_ms': 379.476, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 20.326995849609375, 'cur_kl_coeff': 2.5628905296325684, 'policy_loss': -0.08308325707912445, 'vf_explained_var': 0.348272442817688, 'entropy': 3.1172304153442383, 'total_loss': 20.29005241394043, 'kl': 0.01800324209034443}, 'load_time_ms': 0.67, 'num_steps_sampled': 867600, 'update_time_ms': 2.734}",723,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",45.969255447387695,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,99600,867600,{},83,256,-27.409793084418126,2025-09-05_00-23-11,8.000034678409563,3651947,1757024591,2.943829164403142,28966.48098039627,56913,4.6796875
+cda-server-2,False,3857.9213016033173,"{'sample_time_ms': 46498.858, 'num_steps_trained': 868800, 'grad_time_ms': 379.808, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 16.8582763671875, 'cur_kl_coeff': 2.5628905296325684, 'policy_loss': -0.06791261583566666, 'vf_explained_var': 0.3720639944076538, 'entropy': 2.4986538887023926, 'total_loss': 16.815534591674805, 'kl': 0.009820827282965183}, 'load_time_ms': 0.694, 'num_steps_sampled': 868800, 'update_time_ms': 2.75}",724,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",46.6185564994812,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,100800,868800,{},84,271,-29.84793747667375,2025-09-05_00-23-58,8.000172424884868,3651947,1757024638,3.321710883118062,29013.099536895752,57184,4.439114391143911
+cda-server-2,False,3905.05015873909,"{'sample_time_ms': 46505.631, 'num_steps_trained': 870000, 'grad_time_ms': 377.685, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 11.090794563293457, 'cur_kl_coeff': 2.5628905296325684, 'policy_loss': -0.07219991087913513, 'vf_explained_var': 0.5242128372192383, 'entropy': 2.560317039489746, 'total_loss': 11.037960052490234, 'kl': 0.007556334137916565}, 'load_time_ms': 0.691, 'num_steps_sampled': 870000, 'update_time_ms': 2.708}",725,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",47.128857135772705,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,102000,870000,{},85,281,-36.24064463232036,2025-09-05_00-24-45,8.000035228203407,3651947,1757024685,3.624096387251869,29060.228394031525,57465,4.252669039145908
+cda-server-2,False,3951.67906999588,"{'sample_time_ms': 46493.497, 'num_steps_trained': 871200, 'grad_time_ms': 377.16, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 18.921977996826172, 'cur_kl_coeff': 2.5628905296325684, 'policy_loss': -0.08155166357755661, 'vf_explained_var': 0.40494421124458313, 'entropy': 2.993685483932495, 'total_loss': 18.866363525390625, 'kl': 0.010120230726897717}, 'load_time_ms': 0.687, 'num_steps_sampled': 871200, 'update_time_ms': 2.654}",726,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",46.62891125679016,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,103200,871200,{},86,260,-30.3448061750018,2025-09-05_00-25-32,8.000038512943608,3651947,1757024732,3.026740964189752,29106.857305288315,57725,4.630769230769231
+cda-server-2,False,3998.3986616134644,"{'sample_time_ms': 46501.811, 'num_steps_trained': 872400, 'grad_time_ms': 376.626, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 6.25697660446167, 'cur_kl_coeff': 2.5628905296325684, 'policy_loss': -0.092364102602005, 'vf_explained_var': 0.5412502884864807, 'entropy': 2.757176399230957, 'total_loss': 6.1928324699401855, 'kl': 0.011011307127773762}, 'load_time_ms': 0.694, 'num_steps_sampled': 872400, 'update_time_ms': 2.614}",727,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",46.71959161758423,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,104400,872400,{},87,274,-17.31742520543737,2025-09-05_00-26-19,8.000000506305511,3651947,1757024779,3.427162098359765,29153.5768969059,57999,4.37956204379562
+cda-server-2,False,4045.3620221614838,"{'sample_time_ms': 46476.534, 'num_steps_trained': 873600, 'grad_time_ms': 377.121, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 7.723629474639893, 'cur_kl_coeff': 2.5628905296325684, 'policy_loss': -0.06784352660179138, 'vf_explained_var': 0.6001351475715637, 'entropy': 2.568483829498291, 'total_loss': 7.674904823303223, 'kl': 0.0074600898660719395}, 'load_time_ms': 0.697, 'num_steps_sampled': 873600, 'update_time_ms': 2.617}",728,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",46.96336054801941,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,105600,873600,{},88,280,-21.897753561519192,2025-09-05_00-27-06,8.000020597503449,3651947,1757024826,3.568644497477941,29200.54025745392,58279,4.2821428571428575
+cda-server-2,False,4092.2305996418,"{'sample_time_ms': 46449.759, 'num_steps_trained': 874800, 'grad_time_ms': 377.658, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 5.032718658447266, 'cur_kl_coeff': 2.5628905296325684, 'policy_loss': -0.06987176835536957, 'vf_explained_var': 0.6171102523803711, 'entropy': 2.566596031188965, 'total_loss': 4.980493545532227, 'kl': 0.006885468494147062}, 'load_time_ms': 0.691, 'num_steps_sampled': 874800, 'update_time_ms': 2.628}",729,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",46.86857748031616,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,106800,874800,{},89,283,-17.62443240684487,2025-09-05_00-27-52,8.000031365785473,3651947,1757024872,3.686039953968932,29247.408834934235,58562,4.229681978798586
+cda-server-2,False,4139.1349403858185,"{'sample_time_ms': 46406.891, 'num_steps_trained': 876000, 'grad_time_ms': 377.91, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 14.186030387878418, 'cur_kl_coeff': 2.5628905296325684, 'policy_loss': -0.07504018396139145, 'vf_explained_var': 0.3799859285354614, 'entropy': 2.8820865154266357, 'total_loss': 14.131270408630371, 'kl': 0.007913511246442795}, 'load_time_ms': 0.684, 'num_steps_sampled': 876000, 'update_time_ms': 2.619}",730,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",46.904340744018555,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,108000,876000,{},90,271,-34.642946426208134,2025-09-05_00-28-39,8.000049883580655,3651947,1757024919,3.320845003022244,29294.313175678253,58833,4.450184501845018
+cda-server-2,False,4185.574823856354,"{'sample_time_ms': 46377.522, 'num_steps_trained': 877200, 'grad_time_ms': 376.417, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 6.646142959594727, 'cur_kl_coeff': 2.5628905296325684, 'policy_loss': -0.06841918081045151, 'vf_explained_var': 0.5770161151885986, 'entropy': 2.8252432346343994, 'total_loss': 6.597856521606445, 'kl': 0.007855374366044998}, 'load_time_ms': 0.692, 'num_steps_sampled': 877200, 'update_time_ms': 2.727}",731,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",46.43988347053528,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,109200,877200,{},91,269,-15.727646521120409,2025-09-05_00-29-26,8.000000530303748,3651947,1757024966,3.3184268441065745,29340.75305914879,59102,4.4572490706319705
+cda-server-2,False,4232.155312299728,"{'sample_time_ms': 46295.642, 'num_steps_trained': 878400, 'grad_time_ms': 377.055, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 5.050663948059082, 'cur_kl_coeff': 2.5628905296325684, 'policy_loss': -0.07120595872402191, 'vf_explained_var': 0.5886048078536987, 'entropy': 2.470914125442505, 'total_loss': 4.999657154083252, 'kl': 0.007881563156843185}, 'load_time_ms': 0.698, 'num_steps_sampled': 878400, 'update_time_ms': 2.739}",732,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",46.580488443374634,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,110400,878400,{},92,283,-20.166795150347834,2025-09-05_00-30-12,8.000006800312939,3651947,1757025012,3.646886440726949,29387.333547592163,59385,4.243816254416961
+cda-server-2,False,4279.249795198441,"{'sample_time_ms': 46408.886, 'num_steps_trained': 879600, 'grad_time_ms': 376.343, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 5.111292362213135, 'cur_kl_coeff': 2.5628905296325684, 'policy_loss': -0.06189640238881111, 'vf_explained_var': 0.5947400331497192, 'entropy': 2.4335429668426514, 'total_loss': 5.073536396026611, 'kl': 0.009418894536793232}, 'load_time_ms': 0.703, 'num_steps_sampled': 879600, 'update_time_ms': 2.679}",733,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",47.09448289871216,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,111600,879600,{},93,285,-19.670616629682968,2025-09-05_00-31-00,8.000100171132532,3651947,1757025060,3.707545304214019,29434.428030490875,59670,4.203508771929824
+cda-server-2,False,4326.509482622147,"{'sample_time_ms': 46473.852, 'num_steps_trained': 880800, 'grad_time_ms': 375.486, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 6.908471584320068, 'cur_kl_coeff': 2.5628905296325684, 'policy_loss': -0.07086292654275894, 'vf_explained_var': 0.5395858287811279, 'entropy': 2.5147078037261963, 'total_loss': 6.8816447257995605, 'kl': 0.01718215085566044}, 'load_time_ms': 0.693, 'num_steps_sampled': 880800, 'update_time_ms': 2.675}",734,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'vf_loss_coeff': 1.0, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'env_config': {'generalize': False, 'run_valid': False}, 'custom_resources_per_worker': {}, 'output_max_file_size': 67108864, 'sample_async': False, 'vf_clip_param': 10.0, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_sample_end': None, 'on_episode_step': None, 'on_episode_end': None, 'on_train_result': None, 'on_episode_start': None}, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'num_cpus_for_driver': 1, 'output': None, 'lr_schedule': None, 'env': 'Zhenxin_S_FC', 'num_cpus_per_worker': 1, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'conv_filters': None, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'max_seq_len': 20, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'train_batch_size': 1200, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0}",47.259687423706055,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,112800,880800,{},94,278,-21.357376095154702,2025-09-05_00-31-47,8.000043550016438,3651947,1757025107,3.5422173085621296,29481.68771791458,59948,4.302158273381295
+cda-server-2,False,51.37072706222534,"{'sample_time_ms': 50255.23, 'num_steps_sampled': 877200, 'grad_time_ms': 644.119, 'load_time_ms': 30.837, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 2.757443904876709, 'cur_kl_coeff': 2.5628905296325684, 'policy_loss': -0.06545218080282211, 'vf_explained_var': 0.7160266041755676, 'entropy': 2.517226219177246, 'total_loss': 2.709300994873047, 'kl': 0.006753734778612852}, 'num_steps_trained': 877200, 'update_time_ms': 419.61}",731,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",51.37072706222534,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,1200,877200,{},1,286,-15.63979681503266,2025-09-05_00-33-04,8.000428065231587,3651946,1757025184,3.766429130103205,29345.68390274048,59119,4.174825174825175
+cda-server-2,False,98.52884483337402,"{'sample_time_ms': 48508.027, 'num_steps_sampled': 878400, 'grad_time_ms': 515.882, 'load_time_ms': 15.738, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 2.82240891456604, 'cur_kl_coeff': 0.20000000298023224, 'policy_loss': -0.07866965234279633, 'vf_explained_var': 0.704016923904419, 'entropy': 2.650355339050293, 'total_loss': 2.7508230209350586, 'kl': 0.035418108105659485}, 'num_steps_trained': 878400, 'update_time_ms': 211.147}",732,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.15811777114868,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,2400,878400,{},2,281,-10.21646425354498,2025-09-05_00-33-51,8.000007095791503,3651946,1757025231,3.6234410643776283,29392.842020511627,59400,4.263345195729538
+cda-server-2,False,145.44398641586304,"{'sample_time_ms': 47850.038, 'num_steps_sampled': 879600, 'grad_time_ms': 467.731, 'load_time_ms': 10.748, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 5.903356075286865, 'cur_kl_coeff': 0.30000001192092896, 'policy_loss': -0.08864539116621017, 'vf_explained_var': 0.5746915340423584, 'entropy': 2.428527355194092, 'total_loss': 5.824267864227295, 'kl': 0.03185740113258362}, 'num_steps_trained': 879600, 'update_time_ms': 141.694}",733,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.915141582489014,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,3600,879600,{},3,277,-13.90984164579,2025-09-05_00-34-38,8.00001979376061,3651946,1757025278,3.4883242262017045,29439.757162094116,59677,4.324909747292419
+cda-server-2,False,192.16951775550842,"{'sample_time_ms': 47472.781, 'num_steps_sampled': 880800, 'grad_time_ms': 444.559, 'load_time_ms': 8.221, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 5.837953090667725, 'cur_kl_coeff': 0.44999995827674866, 'policy_loss': -0.08470302820205688, 'vf_explained_var': 0.604572057723999, 'entropy': 2.6007485389709473, 'total_loss': 5.766128063201904, 'kl': 0.028617437928915024}, 'num_steps_trained': 880800, 'update_time_ms': 106.839}",734,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.725531339645386,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,4800,880800,{},4,276,-19.68662802056178,2025-09-05_00-35-25,4.151264166506329,3651946,1757025325,3.425130972179667,29486.48269343376,59953,4.3731884057971016
+cda-server-2,False,239.78782200813293,"{'sample_time_ms': 47424.901, 'num_steps_sampled': 882000, 'grad_time_ms': 430.681, 'load_time_ms': 6.733, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 1.7892976999282837, 'cur_kl_coeff': 0.675000011920929, 'policy_loss': -0.07782699167728424, 'vf_explained_var': 0.7722499370574951, 'entropy': 2.1428143978118896, 'total_loss': 1.7271603345870972, 'kl': 0.02324373461306095}, 'num_steps_trained': 882000, 'update_time_ms': 86.049}",735,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.61830425262451,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,6000,882000,{},5,291,-8.219440340264054,2025-09-05_00-36-12,8.000131683144222,3651946,1757025372,3.8288531592261497,29534.100997686386,60244,4.120274914089347
+cda-server-2,False,287.04843401908875,"{'sample_time_ms': 47335.482, 'num_steps_sampled': 883200, 'grad_time_ms': 419.475, 'load_time_ms': 5.72, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 1.7316595315933228, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.06017966568470001, 'vf_explained_var': 0.7954539060592651, 'entropy': 2.148533821105957, 'total_loss': 1.6863609552383423, 'kl': 0.014697511680424213}, 'num_steps_trained': 883200, 'update_time_ms': 72.121}",736,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.26061201095581,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,7200,883200,{},6,291,-10.483582385177808,2025-09-05_00-37-00,8.000025212410838,3651946,1757025420,3.8384142471468294,29581.361609697342,60535,4.11340206185567
+cda-server-2,False,334.61439299583435,"{'sample_time_ms': 47314.895, 'num_steps_sampled': 884400, 'grad_time_ms': 411.76, 'load_time_ms': 5.004, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 1.9764469861984253, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.06290214508771896, 'vf_explained_var': 0.7574695944786072, 'entropy': 2.184715509414673, 'total_loss': 1.9268689155578613, 'kl': 0.013159679248929024}, 'num_steps_trained': 884400, 'update_time_ms': 62.175}",737,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.565958976745605,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,8400,884400,{},7,291,-8.023271051496412,2025-09-05_00-37-47,7.023995085919162,3651946,1757025467,3.8347218686210556,29628.927568674088,60826,4.120274914089347
+cda-server-2,False,381.60341811180115,"{'sample_time_ms': 47227.894, 'num_steps_sampled': 885600, 'grad_time_ms': 405.384, 'load_time_ms': 4.455, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 2.834179162979126, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.06365206092596054, 'vf_explained_var': 0.7102543711662292, 'entropy': 2.224116802215576, 'total_loss': 2.7815675735473633, 'kl': 0.010904477909207344}, 'num_steps_trained': 885600, 'update_time_ms': 54.712}",738,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.9890251159668,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,9600,885600,{},8,285,-15.972884287985934,2025-09-05_00-38-34,8.0000295037339,3651946,1757025514,3.6705258690296185,29675.916593790054,61111,4.224561403508772
+cda-server-2,False,428.8181712627411,"{'sample_time_ms': 47182.026, 'num_steps_sampled': 886800, 'grad_time_ms': 403.739, 'load_time_ms': 4.045, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 6.863865852355957, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.072520412504673, 'vf_explained_var': 0.5383694171905518, 'entropy': 2.2339959144592285, 'total_loss': 6.808557033538818, 'kl': 0.016999250277876854}, 'num_steps_trained': 886800, 'update_time_ms': 48.891}",739,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.21475315093994,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,10800,886800,{},9,281,-26.667691536904634,2025-09-05_00-39-21,8.000028136390261,3651946,1757025561,3.6138708879956787,29723.131346940994,61392,4.259786476868327
+cda-server-2,False,476.0368766784668,"{'sample_time_ms': 47145.202, 'num_steps_sampled': 888000, 'grad_time_ms': 402.95, 'load_time_ms': 3.715, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 30.431575775146484, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.07045107334852219, 'vf_explained_var': 0.41874897480010986, 'entropy': 2.376946449279785, 'total_loss': 30.371877670288086, 'kl': 0.01061793603003025}, 'num_steps_trained': 888000, 'update_time_ms': 44.245}",740,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.21870541572571,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,12000,888000,{},10,279,-59.05907563110662,2025-09-05_00-40-09,8.000030418811848,3651946,1757025609,3.5417181212822686,29770.35005235672,61671,4.304659498207886
+cda-server-2,False,523.2426319122314,"{'sample_time_ms': 46801.328, 'num_steps_sampled': 889200, 'grad_time_ms': 376.549, 'load_time_ms': 0.699, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 5.343428134918213, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.07987173646688461, 'vf_explained_var': 0.5977872014045715, 'entropy': 2.2722535133361816, 'total_loss': 5.2818779945373535, 'kl': 0.018095334991812706}, 'num_steps_trained': 889200, 'update_time_ms': 2.553}",741,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.20575523376465,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,13200,889200,{},11,284,-18.097665306019596,2025-09-05_00-40-56,8.000023350018065,3651946,1757025656,3.6538322136585846,29817.555807590485,61955,4.22887323943662
+cda-server-2,False,570.2855298519135,"{'sample_time_ms': 46791.193, 'num_steps_sampled': 890400, 'grad_time_ms': 375.237, 'load_time_ms': 0.703, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 4.663482666015625, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.0864449143409729, 'vf_explained_var': 0.6121327877044678, 'entropy': 2.26259708404541, 'total_loss': 4.596853256225586, 'kl': 0.01957099884748459}, 'num_steps_trained': 890400, 'update_time_ms': 2.537}",742,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.04289793968201,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,14400,890400,{},12,284,-14.581270665352228,2025-09-05_00-41-43,8.000023922826255,3651946,1757025703,3.63538998271352,29864.598705530167,62239,4.235915492957746
+cda-server-2,False,616.8720459938049,"{'sample_time_ms': 46758.824, 'num_steps_sampled': 891600, 'grad_time_ms': 374.797, 'load_time_ms': 0.693, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 3.075674057006836, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.07681259512901306, 'vf_explained_var': 0.6700502038002014, 'entropy': 2.2327585220336914, 'total_loss': 3.0228796005249023, 'kl': 0.023721568286418915}, 'num_steps_trained': 891600, 'update_time_ms': 2.507}",743,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.58651614189148,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,15600,891600,{},13,288,-11.860608985074286,2025-09-05_00-42-30,8.000049286677438,3651946,1757025750,3.7590176523967744,29911.185221672058,62527,4.15625
+cda-server-2,False,663.5976617336273,"{'sample_time_ms': 46757.835, 'num_steps_sampled': 892800, 'grad_time_ms': 375.773, 'load_time_ms': 0.7, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 3.446500062942505, 'cur_kl_coeff': 1.5187499523162842, 'policy_loss': -0.065461665391922, 'vf_explained_var': 0.6744142174720764, 'entropy': 2.196315288543701, 'total_loss': 3.398273229598999, 'kl': 0.011347964406013489}, 'num_steps_trained': 892800, 'update_time_ms': 2.508}",744,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.72561573982239,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,16800,892800,{},14,285,-14.499308406275883,2025-09-05_00-43-16,8.000000550827266,3651946,1757025796,3.6625870164337764,29957.91083741188,62812,4.217543859649123
+cda-server-2,False,710.6882572174072,"{'sample_time_ms': 46706.204, 'num_steps_sampled': 894000, 'grad_time_ms': 374.632, 'load_time_ms': 0.692, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 2.7077999114990234, 'cur_kl_coeff': 1.5187499523162842, 'policy_loss': -0.06709850579500198, 'vf_explained_var': 0.7486275434494019, 'entropy': 2.110781669616699, 'total_loss': 2.655268669128418, 'kl': 0.009591775946319103}, 'num_steps_trained': 894000, 'update_time_ms': 2.506}",745,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.09059548377991,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,18000,894000,{},15,290,-17.46548281771164,2025-09-05_00-44-03,8.000287384166736,3651946,1757025843,3.8059023495548634,30005.00143289566,63102,4.13448275862069
+cda-server-2,False,757.5581822395325,"{'sample_time_ms': 46666.184, 'num_steps_sampled': 895200, 'grad_time_ms': 375.562, 'load_time_ms': 0.696, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 1.3441556692123413, 'cur_kl_coeff': 1.5187499523162842, 'policy_loss': -0.07237453758716583, 'vf_explained_var': 0.8276771903038025, 'entropy': 2.147783041000366, 'total_loss': 1.2861649990081787, 'kl': 0.009470919147133827}, 'num_steps_trained': 895200, 'update_time_ms': 2.505}",746,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.869925022125244,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,19200,895200,{},16,293,-10.233729541154727,2025-09-05_00-44-50,8.000077229562166,3651946,1757025890,3.8663022002656935,30051.871357917786,63395,4.092150170648464
+cda-server-2,False,805.0831966400146,"{'sample_time_ms': 46662.676, 'num_steps_sampled': 896400, 'grad_time_ms': 374.992, 'load_time_ms': 0.685, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 3.182039976119995, 'cur_kl_coeff': 1.5187499523162842, 'policy_loss': -0.0653173103928566, 'vf_explained_var': 0.7064453363418579, 'entropy': 2.1646270751953125, 'total_loss': 3.1286532878875732, 'kl': 0.007855619303882122}, 'num_steps_trained': 896400, 'update_time_ms': 2.484}",747,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.52501440048218,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,20400,896400,{},17,286,-10.768000822716967,2025-09-05_00-45-38,8.00002646899012,3651946,1757025938,3.7181498219123434,30099.396372318268,63681,4.188811188811189
+cda-server-2,False,851.8751463890076,"{'sample_time_ms': 46642.685, 'num_steps_sampled': 897600, 'grad_time_ms': 375.297, 'load_time_ms': 0.695, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 5.418216705322266, 'cur_kl_coeff': 1.5187499523162842, 'policy_loss': -0.06901151686906815, 'vf_explained_var': 0.6305850148200989, 'entropy': 2.42480731010437, 'total_loss': 5.365065574645996, 'kl': 0.010443081147968769}, 'num_steps_trained': 897600, 'update_time_ms': 2.498}",748,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.79194974899292,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,21600,897600,{},18,280,-21.04422672642532,2025-09-05_00-46-25,8.0000012869185,3651946,1757025985,3.5812138922683285,30146.18832206726,63961,4.292857142857143
+cda-server-2,False,899.5503449440002,"{'sample_time_ms': 46689.241, 'num_steps_sampled': 898800, 'grad_time_ms': 374.778, 'load_time_ms': 0.691, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.966882586479187, 'cur_kl_coeff': 1.5187499523162842, 'policy_loss': -0.06444099545478821, 'vf_explained_var': 0.8585163950920105, 'entropy': 1.9598612785339355, 'total_loss': 0.9176361560821533, 'kl': 0.010004710406064987}, 'num_steps_trained': 898800, 'update_time_ms': 2.503}",749,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.675198554992676,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,22800,898800,{},19,294,-7.156435126544029,2025-09-05_00-47-12,5.006220064224614,3651946,1757026032,3.8742650526256477,30193.863520622253,64255,4.085034013605442
+cda-server-2,False,946.906375169754,"{'sample_time_ms': 46703.824, 'num_steps_sampled': 900000, 'grad_time_ms': 373.825, 'load_time_ms': 0.694, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 1.9599549770355225, 'cur_kl_coeff': 1.5187499523162842, 'policy_loss': -0.05036471039056778, 'vf_explained_var': 0.769665002822876, 'entropy': 1.9341177940368652, 'total_loss': 1.9194469451904297, 'kl': 0.006490045692771673}, 'num_steps_trained': 900000, 'update_time_ms': 2.528}",750,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.356030225753784,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,24000,900000,{},20,291,-12.417183202285436,2025-09-05_00-48-00,8.000020497310967,3651946,1757026080,3.827909401140039,30241.219550848007,64546,4.109965635738831
+cda-server-2,False,993.5681178569794,"{'sample_time_ms': 46649.872, 'num_steps_sampled': 901200, 'grad_time_ms': 373.321, 'load_time_ms': 0.689, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 1.1924660205841064, 'cur_kl_coeff': 1.5187499523162842, 'policy_loss': -0.055155880749225616, 'vf_explained_var': 0.8331034183502197, 'entropy': 1.963564157485962, 'total_loss': 1.1522517204284668, 'kl': 0.009838176891207695}, 'num_steps_trained': 901200, 'update_time_ms': 2.526}",751,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.66174268722534,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,25200,901200,{},21,294,-8.014700964460296,2025-09-05_00-48-46,8.000081302487182,3651946,1757026126,3.856448872159633,30287.881293535233,64840,4.091836734693878
+cda-server-2,False,1040.7652094364166,"{'sample_time_ms': 46663.7, 'num_steps_sampled': 902400, 'grad_time_ms': 374.839, 'load_time_ms': 0.682, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 3.3636016845703125, 'cur_kl_coeff': 1.5187499523162842, 'policy_loss': -0.058115165680646896, 'vf_explained_var': 0.6844215989112854, 'entropy': 2.001267194747925, 'total_loss': 3.319319486618042, 'kl': 0.009108071215450764}, 'num_steps_trained': 902400, 'update_time_ms': 2.526}",752,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.197091579437256,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,26400,902400,{},22,288,-16.769227463652225,2025-09-05_00-49-34,8.000003331035625,3651946,1757026174,3.745986875609415,30335.07838511467,65128,4.166666666666667
+cda-server-2,False,1087.9503815174103,"{'sample_time_ms': 46723.952, 'num_steps_sampled': 903600, 'grad_time_ms': 374.386, 'load_time_ms': 0.689, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.5753068327903748, 'cur_kl_coeff': 1.5187499523162842, 'policy_loss': -0.0639975443482399, 'vf_explained_var': 0.9057748317718506, 'entropy': 1.8623218536376953, 'total_loss': 0.5323663949966431, 'kl': 0.013864720240235329}, 'num_steps_trained': 903600, 'update_time_ms': 2.568}",753,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.18517208099365,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,27600,903600,{},23,295,-6.585781991973349,2025-09-05_00-50-21,4.142384938761737,3651946,1757026221,3.8931401774311887,30382.263557195663,65423,4.071186440677966
+cda-server-2,False,1136.2893908023834,"{'sample_time_ms': 46885.196, 'num_steps_sampled': 904800, 'grad_time_ms': 374.532, 'load_time_ms': 0.686, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 1.7282322645187378, 'cur_kl_coeff': 1.5187499523162842, 'policy_loss': -0.04838193207979202, 'vf_explained_var': 0.7900778651237488, 'entropy': 2.0110979080200195, 'total_loss': 1.6923675537109375, 'kl': 0.008241821080446243}, 'num_steps_trained': 904800, 'update_time_ms': 2.564}",754,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",48.339009284973145,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,28800,904800,{},24,292,-11.56023833904667,2025-09-05_00-51-09,4.983661140020629,3651946,1757026269,3.83356178138226,30430.602566480637,65715,4.109589041095891
+cda-server-2,False,1183.1218509674072,"{'sample_time_ms': 46859.446, 'num_steps_sampled': 906000, 'grad_time_ms': 374.529, 'load_time_ms': 0.684, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 1.5535991191864014, 'cur_kl_coeff': 1.5187499523162842, 'policy_loss': -0.04831709340214729, 'vf_explained_var': 0.8219261765480042, 'entropy': 1.9546822309494019, 'total_loss': 1.51687753200531, 'kl': 0.0076349047012627125}, 'num_steps_trained': 906000, 'update_time_ms': 2.543}",755,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.832460165023804,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,30000,906000,{},25,294,-9.86583055741805,2025-09-05_00-51-56,4.106511992796452,3651946,1757026316,3.849252125650591,30477.43502664566,66009,4.095238095238095
+cda-server-2,False,1229.7373206615448,"{'sample_time_ms': 46835.34, 'num_steps_sampled': 907200, 'grad_time_ms': 373.125, 'load_time_ms': 0.684, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 5.297676086425781, 'cur_kl_coeff': 1.5187499523162842, 'policy_loss': -0.05537404119968414, 'vf_explained_var': 0.6569035053253174, 'entropy': 2.070859670639038, 'total_loss': 5.249495506286621, 'kl': 0.004736693575978279}, 'num_steps_trained': 907200, 'update_time_ms': 2.552}",756,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.61546969413757,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,31200,907200,{},26,289,-29.28841041210508,2025-09-05_00-52-43,4.002615996342401,3651946,1757026363,3.767955028791489,30524.050496339798,66298,4.145328719723183
+cda-server-2,False,1276.9345707893372,"{'sample_time_ms': 46799.988, 'num_steps_sampled': 908400, 'grad_time_ms': 375.649, 'load_time_ms': 0.699, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 3.9263293743133545, 'cur_kl_coeff': 0.7593749761581421, 'policy_loss': -0.058822259306907654, 'vf_explained_var': 0.6741502285003662, 'entropy': 2.136699914932251, 'total_loss': 3.889329195022583, 'kl': 0.028736749663949013}, 'num_steps_trained': 908400, 'update_time_ms': 2.553}",757,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.19725012779236,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,32400,908400,{},27,287,-20.18232179504566,2025-09-05_00-53-30,8.000017481990522,3651946,1757026410,3.736034700889114,30571.24774646759,66585,4.174216027874564
+cda-server-2,False,1323.8899717330933,"{'sample_time_ms': 46816.893, 'num_steps_sampled': 909600, 'grad_time_ms': 375.066, 'load_time_ms': 0.697, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 5.565729141235352, 'cur_kl_coeff': 1.139062523841858, 'policy_loss': -0.0506330206990242, 'vf_explained_var': 0.6052297353744507, 'entropy': 1.989142894744873, 'total_loss': 5.527322292327881, 'kl': 0.010733265429735184}, 'num_steps_trained': 909600, 'update_time_ms': 2.574}",758,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.9554009437561,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,33600,909600,{},28,290,-24.829486851198794,2025-09-05_00-54-17,8.000060744316851,3651946,1757026457,3.777224669021877,30618.203147411346,66875,4.144827586206897
+cda-server-2,False,1371.0296182632446,"{'sample_time_ms': 46763.367, 'num_steps_sampled': 910800, 'grad_time_ms': 374.995, 'load_time_ms': 0.696, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 2.501784563064575, 'cur_kl_coeff': 1.139062523841858, 'policy_loss': -0.059095755219459534, 'vf_explained_var': 0.7160967588424683, 'entropy': 1.9628499746322632, 'total_loss': 2.4916887283325195, 'kl': 0.04301762580871582}, 'num_steps_trained': 910800, 'update_time_ms': 2.607}",759,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.13964653015137,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,34800,910800,{},29,288,-11.780509092009758,2025-09-05_00-55-04,4.000989966947292,3651946,1757026504,3.755538409299826,30665.342793941498,67163,4.15625
+cda-server-2,False,1418.2535438537598,"{'sample_time_ms': 46752.966, 'num_steps_sampled': 912000, 'grad_time_ms': 372.306, 'load_time_ms': 0.684, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 2.2747597694396973, 'cur_kl_coeff': 1.708593726158142, 'policy_loss': -0.05887572094798088, 'vf_explained_var': 0.7684090733528137, 'entropy': 1.9990357160568237, 'total_loss': 2.2269842624664307, 'kl': 0.00649667764082551}, 'num_steps_trained': 912000, 'update_time_ms': 2.598}",760,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.22392559051514,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,36000,912000,{},30,291,-19.74093652901318,2025-09-05_00-55-51,5.502330570062896,3651946,1757026551,3.796380304913446,30712.566719532013,67454,4.1271477663230245
+cda-server-2,False,1465.2916657924652,"{'sample_time_ms': 46790.791, 'num_steps_sampled': 913200, 'grad_time_ms': 372.05, 'load_time_ms': 0.687, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 3.733677387237549, 'cur_kl_coeff': 1.708593726158142, 'policy_loss': -0.06684742867946625, 'vf_explained_var': 0.6711017489433289, 'entropy': 2.162313461303711, 'total_loss': 3.6854870319366455, 'kl': 0.010919542983174324}, 'num_steps_trained': 913200, 'update_time_ms': 2.634}",761,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.038121938705444,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,37200,913200,{},31,284,-18.80590203704147,2025-09-05_00-56-38,4.100088669613918,3651946,1757026598,3.666076323286445,30759.60484147072,67738,4.221830985915493
+cda-server-2,False,1513.3106949329376,"{'sample_time_ms': 46875.575, 'num_steps_sampled': 914400, 'grad_time_ms': 369.424, 'load_time_ms': 0.691, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 3.3427648544311523, 'cur_kl_coeff': 1.708593726158142, 'policy_loss': -0.05309293791651726, 'vf_explained_var': 0.7214845418930054, 'entropy': 1.996940016746521, 'total_loss': 3.305532693862915, 'kl': 0.009283188730478287}, 'num_steps_trained': 914400, 'update_time_ms': 2.665}",762,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",48.01902914047241,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,38400,914400,{},32,289,-19.02872107031565,2025-09-05_00-57-26,8.000000521115968,3651946,1757026646,3.7687700117198597,30807.62387061119,68027,4.14878892733564
+cda-server-2,False,1560.8343374729156,"{'sample_time_ms': 46907.807, 'num_steps_sampled': 915600, 'grad_time_ms': 371.119, 'load_time_ms': 0.679, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 2.529505729675293, 'cur_kl_coeff': 1.708593726158142, 'policy_loss': -0.06161157786846161, 'vf_explained_var': 0.7259671092033386, 'entropy': 2.2362306118011475, 'total_loss': 2.4820244312286377, 'kl': 0.008270077407360077}, 'num_steps_trained': 915600, 'update_time_ms': 2.627}",763,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.52364253997803,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,39600,915600,{},33,287,-13.032147027614243,2025-09-05_00-58-14,5.0118155810203895,3651946,1757026694,3.7187382927629975,30855.14751315117,68314,4.191637630662021
+cda-server-2,False,1607.6388757228851,"{'sample_time_ms': 46754.006, 'num_steps_sampled': 916800, 'grad_time_ms': 371.454, 'load_time_ms': 0.68, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 3.095893383026123, 'cur_kl_coeff': 1.708593726158142, 'policy_loss': -0.05758281424641609, 'vf_explained_var': 0.7039374709129333, 'entropy': 1.8250925540924072, 'total_loss': 3.058149814605713, 'kl': 0.011611266992986202}, 'num_steps_trained': 916800, 'update_time_ms': 2.668}",764,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.80453824996948,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,40800,916800,{},34,290,-15.405464045090536,2025-09-05_00-59-01,4.101943775072241,3651946,1757026741,3.760532188373482,30901.95205140114,68604,4.1482758620689655
+cda-server-2,False,1654.4262464046478,"{'sample_time_ms': 46749.439, 'num_steps_sampled': 918000, 'grad_time_ms': 371.511, 'load_time_ms': 0.681, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 5.60544490814209, 'cur_kl_coeff': 1.708593726158142, 'policy_loss': -0.045216772705316544, 'vf_explained_var': 0.6065601706504822, 'entropy': 2.1049537658691406, 'total_loss': 5.587712287902832, 'kl': 0.016085775569081306}, 'num_steps_trained': 918000, 'update_time_ms': 2.657}",765,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.787370681762695,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,42000,918000,{},35,284,-23.6895425258586,2025-09-05_00-59-48,4.001086082575656,3651946,1757026788,3.669152911224492,30948.7394220829,68888,4.207746478873239
+cda-server-2,False,1701.4659514427185,"{'sample_time_ms': 46791.69, 'num_steps_sampled': 919200, 'grad_time_ms': 371.68, 'load_time_ms': 0.676, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 1.3845746517181396, 'cur_kl_coeff': 1.708593726158142, 'policy_loss': -0.04864999279379845, 'vf_explained_var': 0.8147275447845459, 'entropy': 1.8801261186599731, 'total_loss': 1.3486356735229492, 'kl': 0.007439528126269579}, 'num_steps_trained': 919200, 'update_time_ms': 2.668}",766,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.03970503807068,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,43200,919200,{},36,294,-11.958277723890621,2025-09-05_01-00-35,8.000003116702734,3651946,1757026835,3.8941448277293755,30995.77912712097,69182,4.071428571428571
+cda-server-2,False,1748.6571514606476,"{'sample_time_ms': 46793.19, 'num_steps_sampled': 920400, 'grad_time_ms': 369.533, 'load_time_ms': 0.675, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 2.241079330444336, 'cur_kl_coeff': 1.708593726158142, 'policy_loss': -0.05559562146663666, 'vf_explained_var': 0.7553683519363403, 'entropy': 1.8627183437347412, 'total_loss': 2.200254440307617, 'kl': 0.00864488072693348}, 'num_steps_trained': 920400, 'update_time_ms': 2.712}",767,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.19120001792908,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,44400,920400,{},37,291,-15.270401846881054,2025-09-05_01-01-22,4.109935601198734,3651946,1757026882,3.8054175794698,31042.9703271389,69473,4.1271477663230245
+cda-server-2,False,1796.2981708049774,"{'sample_time_ms': 46859.035, 'num_steps_sampled': 921600, 'grad_time_ms': 372.262, 'load_time_ms': 0.681, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 3.658536672592163, 'cur_kl_coeff': 1.708593726158142, 'policy_loss': -0.052428171038627625, 'vf_explained_var': 0.7163556218147278, 'entropy': 1.8900055885314941, 'total_loss': 3.6253576278686523, 'kl': 0.011266032233834267}, 'num_steps_trained': 921600, 'update_time_ms': 2.679}",768,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.641019344329834,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,45600,921600,{},38,289,-21.77859812250777,2025-09-05_01-02-09,4.126091784186673,3651946,1757026929,3.7549427429645186,31090.61134648323,69762,4.159169550173011
+cda-server-2,False,1844.1948111057281,"{'sample_time_ms': 46934.512, 'num_steps_sampled': 922800, 'grad_time_ms': 372.491, 'load_time_ms': 0.685, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 3.9979138374328613, 'cur_kl_coeff': 1.708593726158142, 'policy_loss': -0.0605757050216198, 'vf_explained_var': 0.6288023591041565, 'entropy': 1.9570708274841309, 'total_loss': 3.953638792037964, 'kl': 0.009540567174553871}, 'num_steps_trained': 922800, 'update_time_ms': 2.665}",769,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.89664030075073,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,46800,922800,{},39,285,-20.098995017968985,2025-09-05_01-02-57,5.205426811179739,3651946,1757026977,3.66778813282919,31138.50798678398,70047,4.2105263157894735
+cda-server-2,False,1891.3065786361694,"{'sample_time_ms': 46922.462, 'num_steps_sampled': 924000, 'grad_time_ms': 373.286, 'load_time_ms': 0.686, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 2.0683321952819824, 'cur_kl_coeff': 1.708593726158142, 'policy_loss': -0.044321898370981216, 'vf_explained_var': 0.7987402081489563, 'entropy': 1.8195881843566895, 'total_loss': 2.0326194763183594, 'kl': 0.005038855131715536}, 'num_steps_trained': 924000, 'update_time_ms': 2.657}",770,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.111767530441284,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,48000,924000,{},40,295,-16.05550411167573,2025-09-05_01-03-45,4.592011325067769,3651946,1757027025,3.9061346613050865,31185.619754314423,70342,4.064406779661017
+cda-server-2,False,1938.3917880058289,"{'sample_time_ms': 46928.13, 'num_steps_sampled': 925200, 'grad_time_ms': 372.466, 'load_time_ms': 0.681, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 5.500292778015137, 'cur_kl_coeff': 1.708593726158142, 'policy_loss': -0.05776253715157509, 'vf_explained_var': 0.6448712944984436, 'entropy': 2.2128381729125977, 'total_loss': 5.457508563995361, 'kl': 0.008766286075115204}, 'num_steps_trained': 925200, 'update_time_ms': 2.594}",771,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.085209369659424,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,49200,925200,{},41,283,-26.04057689857185,2025-09-05_01-04-32,4.002401863734518,3651946,1757027072,3.636368039128188,31232.704963684082,70625,4.23321554770318
+cda-server-2,False,1984.8419890403748,"{'sample_time_ms': 46771.66, 'num_steps_sampled': 926400, 'grad_time_ms': 372.114, 'load_time_ms': 0.681, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 12.420703887939453, 'cur_kl_coeff': 1.708593726158142, 'policy_loss': -0.05916784703731537, 'vf_explained_var': 0.522458553314209, 'entropy': 2.136233329772949, 'total_loss': 12.379623413085938, 'kl': 0.010586130432784557}, 'num_steps_trained': 926400, 'update_time_ms': 2.567}",772,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.4502010345459,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,50400,926400,{},42,279,-39.33067773468262,2025-09-05_01-05-18,4.092580515738035,3651946,1757027118,3.515820113005836,31279.155164718628,70904,4.308243727598566
+cda-server-2,False,2031.839220046997,"{'sample_time_ms': 46718.226, 'num_steps_sampled': 927600, 'grad_time_ms': 372.883, 'load_time_ms': 0.692, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 6.606550216674805, 'cur_kl_coeff': 1.708593726158142, 'policy_loss': -0.06816405057907104, 'vf_explained_var': 0.5809977054595947, 'entropy': 1.9930299520492554, 'total_loss': 6.560443878173828, 'kl': 0.012910023331642151}, 'num_steps_trained': 927600, 'update_time_ms': 2.581}",773,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.997231006622314,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,51600,927600,{},43,283,-21.88796172680281,2025-09-05_01-06-05,4.002439810687289,3651946,1757027165,3.612397638943765,31326.15239572525,71187,4.247349823321555
+cda-server-2,False,2078.8071944713593,"{'sample_time_ms': 46735.43, 'num_steps_sampled': 928800, 'grad_time_ms': 371.922, 'load_time_ms': 0.689, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 2.415851354598999, 'cur_kl_coeff': 1.708593726158142, 'policy_loss': -0.07837872952222824, 'vf_explained_var': 0.7386643886566162, 'entropy': 1.9028277397155762, 'total_loss': 2.3538591861724854, 'kl': 0.00959052238613367}, 'num_steps_trained': 928800, 'update_time_ms': 2.67}",774,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.96797442436218,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,52800,928800,{},44,287,-12.10505425469865,2025-09-05_01-06-52,4.0019870945357,3651946,1757027212,3.7305935253341027,31373.120370149612,71474,4.174216027874564
+cda-server-2,False,2125.933746099472,"{'sample_time_ms': 46769.426, 'num_steps_sampled': 930000, 'grad_time_ms': 371.785, 'load_time_ms': 0.686, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 1.8429656028747559, 'cur_kl_coeff': 1.708593726158142, 'policy_loss': -0.04755048826336861, 'vf_explained_var': 0.7892983555793762, 'entropy': 1.7938878536224365, 'total_loss': 1.809276819229126, 'kl': 0.008112970739603043}, 'num_steps_trained': 930000, 'update_time_ms': 2.697}",775,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.12655162811279,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,54000,930000,{},45,294,-13.78541047688628,2025-09-05_01-07-39,4.064681068362106,3651946,1757027259,3.865382094416491,31420.246921777725,71768,4.085034013605442
+cda-server-2,False,2172.9108469486237,"{'sample_time_ms': 46763.129, 'num_steps_sampled': 931200, 'grad_time_ms': 371.872, 'load_time_ms': 0.687, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 4.4395365715026855, 'cur_kl_coeff': 1.708593726158142, 'policy_loss': -0.04413954168558121, 'vf_explained_var': 0.6769609451293945, 'entropy': 1.8190557956695557, 'total_loss': 4.407191276550293, 'kl': 0.0069029685109853745}, 'num_steps_trained': 931200, 'update_time_ms': 2.688}",776,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.97710084915161,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,55200,931200,{},46,291,-23.977696050912584,2025-09-05_01-08-26,8.000033070321237,3651946,1757027306,3.798016183340947,31467.224022626877,72059,4.130584192439863
+cda-server-2,False,2219.4841015338898,"{'sample_time_ms': 46701.596, 'num_steps_sampled': 932400, 'grad_time_ms': 371.708, 'load_time_ms': 0.674, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 2.954724073410034, 'cur_kl_coeff': 1.708593726158142, 'policy_loss': -0.05634055659174919, 'vf_explained_var': 0.6867001056671143, 'entropy': 2.0110385417938232, 'total_loss': 2.9130592346191406, 'kl': 0.008589200675487518}, 'num_steps_trained': 932400, 'update_time_ms': 2.641}",777,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.57325458526611,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,56400,932400,{},47,286,-12.047988655655782,2025-09-05_01-09-13,4.350972925973858,3651946,1757027353,3.6947120059361707,31513.797277212143,72345,4.195804195804196
+cda-server-2,False,2266.5658695697784,"{'sample_time_ms': 46645.782, 'num_steps_sampled': 933600, 'grad_time_ms': 371.628, 'load_time_ms': 0.675, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 2.6642534732818604, 'cur_kl_coeff': 1.708593726158142, 'policy_loss': -0.04670833796262741, 'vf_explained_var': 0.6989669799804688, 'entropy': 1.9170633554458618, 'total_loss': 2.63336181640625, 'kl': 0.009257161058485508}, 'num_steps_trained': 933600, 'update_time_ms': 2.621}",778,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.08176803588867,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,57600,933600,{},48,289,-10.348640951962327,2025-09-05_01-10-00,4.002221755706053,3651946,1757027400,3.7923208806491115,31560.87904524803,72634,4.141868512110727
+cda-server-2,False,2313.4038367271423,"{'sample_time_ms': 46539.462, 'num_steps_sampled': 934800, 'grad_time_ms': 372.071, 'load_time_ms': 0.675, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.362484335899353, 'cur_kl_coeff': 1.708593726158142, 'policy_loss': -0.062406666576862335, 'vf_explained_var': 0.9415891170501709, 'entropy': 1.7499221563339233, 'total_loss': 0.3161599040031433, 'kl': 0.009412539191544056}, 'num_steps_trained': 934800, 'update_time_ms': 2.638}",779,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.83796715736389,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,58800,934800,{},49,296,-5.06328498551769,2025-09-05_01-10-47,4.117140791597043,3651946,1757027447,3.9165722485046945,31607.717012405396,72930,4.054054054054054
+cda-server-2,False,2361.142077922821,"{'sample_time_ms': 46600.847, 'num_steps_sampled': 936000, 'grad_time_ms': 373.36, 'load_time_ms': 0.676, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 1.1525019407272339, 'cur_kl_coeff': 1.708593726158142, 'policy_loss': -0.045695994049310684, 'vf_explained_var': 0.8327200412750244, 'entropy': 1.7713196277618408, 'total_loss': 1.1177312135696411, 'kl': 0.006394288036972284}, 'num_steps_trained': 936000, 'update_time_ms': 2.63}",780,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.73824119567871,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,60000,936000,{},50,295,-8.59429343910615,2025-09-05_01-11-35,4.154023467959979,3651946,1757027495,3.8926968020858577,31655.455253601074,73225,4.071186440677966
+cda-server-2,False,2408.5858132839203,"{'sample_time_ms': 46636.516, 'num_steps_sampled': 937200, 'grad_time_ms': 373.411, 'load_time_ms': 0.699, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 1.8505889177322388, 'cur_kl_coeff': 1.708593726158142, 'policy_loss': -0.06169609725475311, 'vf_explained_var': 0.7730779647827148, 'entropy': 1.8604083061218262, 'total_loss': 1.8047436475753784, 'kl': 0.009277136996388435}, 'num_steps_trained': 937200, 'update_time_ms': 2.647}",781,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.44373536109924,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,61200,937200,{},51,290,-10.083785416447018,2025-09-05_01-12-22,4.392711964661583,3651946,1757027542,3.7665839765890223,31702.898988962173,73515,4.1482758620689655
+cda-server-2,False,2455.3863422870636,"{'sample_time_ms': 46671.265, 'num_steps_sampled': 938400, 'grad_time_ms': 373.716, 'load_time_ms': 0.697, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 3.5524978637695312, 'cur_kl_coeff': 1.708593726158142, 'policy_loss': -0.05039419233798981, 'vf_explained_var': 0.7071776986122131, 'entropy': 1.9242134094238281, 'total_loss': 3.5340025424957275, 'kl': 0.018669771030545235}, 'num_steps_trained': 938400, 'update_time_ms': 2.656}",782,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.80052900314331,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,62400,938400,{},52,285,-18.5029933956362,2025-09-05_01-13-09,4.001542615606032,3651946,1757027589,3.686333737405312,31749.699517965317,73800,4.2
+cda-server-2,False,2502.4582917690277,"{'sample_time_ms': 46678.74, 'num_steps_sampled': 939600, 'grad_time_ms': 373.655, 'load_time_ms': 0.709, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 5.03559684753418, 'cur_kl_coeff': 1.708593726158142, 'policy_loss': -0.06031392142176628, 'vf_explained_var': 0.5796419382095337, 'entropy': 2.035823345184326, 'total_loss': 4.999176502227783, 'kl': 0.01398418378084898}, 'num_steps_trained': 939600, 'update_time_ms': 2.653}",783,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.07194948196411,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,63600,939600,{},53,284,-21.840290534614667,2025-09-05_01-13-56,4.001584936178242,3651946,1757027636,3.641597409847955,31796.77146744728,74084,4.235915492957746
+cda-server-2,False,2551.2730057239532,"{'sample_time_ms': 46864.77, 'num_steps_sampled': 940800, 'grad_time_ms': 372.387, 'load_time_ms': 0.72, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 4.303224563598633, 'cur_kl_coeff': 1.708593726158142, 'policy_loss': -0.05835850536823273, 'vf_explained_var': 0.606787919998169, 'entropy': 1.9617383480072021, 'total_loss': 4.258739471435547, 'kl': 0.008119616657495499}, 'num_steps_trained': 940800, 'update_time_ms': 2.574}",784,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",48.81471395492554,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,64800,940800,{},54,285,-15.377537604738691,2025-09-05_01-14-45,4.020792989221064,3651946,1757027685,3.663630381865669,31845.586181402206,74369,4.2105263157894735
+cda-server-2,False,2598.3692677021027,"{'sample_time_ms': 46859.844, 'num_steps_sampled': 942000, 'grad_time_ms': 374.344, 'load_time_ms': 0.727, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.6878752112388611, 'cur_kl_coeff': 1.708593726158142, 'policy_loss': -0.05567542091012001, 'vf_explained_var': 0.8880141973495483, 'entropy': 1.6891648769378662, 'total_loss': 0.6541071534156799, 'kl': 0.012821835465729237}, 'num_steps_trained': 942000, 'update_time_ms': 2.526}",785,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.096261978149414,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,66000,942000,{},55,295,-2.1602375685593795,2025-09-05_01-15-32,8.000052124494673,3651946,1757027732,3.9152287397209276,31892.682443380356,74664,4.061016949152543
+cda-server-2,False,2645.4723284244537,"{'sample_time_ms': 46872.238, 'num_steps_sampled': 943200, 'grad_time_ms': 374.543, 'load_time_ms': 0.734, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 3.5516011714935303, 'cur_kl_coeff': 1.708593726158142, 'policy_loss': -0.05592495948076248, 'vf_explained_var': 0.6857667565345764, 'entropy': 1.9146186113357544, 'total_loss': 3.513665199279785, 'kl': 0.01052860077470541}, 'num_steps_trained': 943200, 'update_time_ms': 2.539}",786,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.103060722351074,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,67200,943200,{},56,288,-17.055190062858692,2025-09-05_01-16-19,4.151147511426473,3651946,1757027779,3.742135272957436,31939.785504102707,74952,4.163194444444445
+cda-server-2,False,2692.3276150226593,"{'sample_time_ms': 46897.224, 'num_steps_sampled': 944400, 'grad_time_ms': 377.628, 'load_time_ms': 0.753, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 9.774484634399414, 'cur_kl_coeff': 1.708593726158142, 'policy_loss': -0.07012740522623062, 'vf_explained_var': 0.5126588344573975, 'entropy': 2.1517908573150635, 'total_loss': 9.721240043640137, 'kl': 0.009880815632641315}, 'num_steps_trained': 944400, 'update_time_ms': 2.559}",787,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.855286598205566,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,68400,944400,{},57,278,-32.373501323479616,2025-09-05_01-17-06,8.000032734282158,3651946,1757027826,3.523573781630109,31986.640790700912,75230,4.302158273381295
+cda-server-2,False,2739.5533571243286,"{'sample_time_ms': 46910.383, 'num_steps_sampled': 945600, 'grad_time_ms': 378.833, 'load_time_ms': 0.75, 'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 2.4646711349487305, 'cur_kl_coeff': 1.708593726158142, 'policy_loss': -0.06321736425161362, 'vf_explained_var': 0.7288916707038879, 'entropy': 1.8632335662841797, 'total_loss': 2.421541929244995, 'kl': 0.011756868101656437}, 'num_steps_trained': 945600, 'update_time_ms': 2.588}",788,"{'clip_actions': True, 'vf_share_layers': False, 'sgd_minibatch_size': 128, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'custom_resources_per_worker': {}, 'callbacks': {'on_sample_end': None, 'on_train_result': None, 'on_episode_start': None, 'on_episode_end': None, 'on_episode_step': None}, 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'use_gae': True, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'entropy_coeff': 0.0, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'input': 'sampler', 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'clip_rewards': None, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'lr_schedule': None, 'kl_coeff': 0.2, 'straggler_mitigation': False, 'tf_session_args': {'device_count': {'CPU': 1}, 'inter_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'intra_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'output': None, 'vf_loss_coeff': 1.0, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'output_max_file_size': 67108864, 'num_cpus_for_driver': 1, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.22574210166931,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,69600,945600,{},58,291,-11.991513132469333,2025-09-05_01-17-53,8.00001788597729,3651946,1757027873,3.782112459354446,32033.86653280258,75521,4.140893470790378
+cda-server-2,False,51.66564321517944,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.9922509789466858, 'cur_kl_coeff': 1.708593726158142, 'policy_loss': -0.054242223501205444, 'vf_explained_var': 0.8546984195709229, 'entropy': 1.8489820957183838, 'total_loss': 0.9491172432899475, 'kl': 0.006501571275293827}, 'sample_time_ms': 50597.815, 'num_steps_sampled': 937200, 'grad_time_ms': 647.607, 'num_steps_trained': 937200, 'load_time_ms': 30.291, 'update_time_ms': 368.902}",781,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",51.66564321517944,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,1200,937200,{},1,291,-14.976873928364402,2025-09-05_01-19-03,4.002509303514523,3651949,1757027943,3.8545046042374915,31707.120896816254,73516,4.092783505154639
+cda-server-2,False,99.1927056312561,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 2.9489707946777344, 'cur_kl_coeff': 0.20000000298023224, 'policy_loss': -0.06298628449440002, 'vf_explained_var': 0.6915463805198669, 'entropy': 1.8564648628234863, 'total_loss': 2.893836259841919, 'kl': 0.039258651435375214}, 'sample_time_ms': 48865.899, 'num_steps_sampled': 938400, 'grad_time_ms': 515.502, 'num_steps_trained': 938400, 'load_time_ms': 15.518, 'update_time_ms': 185.758}",782,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.52706241607666,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,2400,938400,{},2,288,-12.264256535839294,2025-09-05_01-19-51,8.000036670659627,3651949,1757027991,3.718140981724725,31754.64795923233,73804,4.177083333333333
+cda-server-2,False,146.26294922828674,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 1.8496922254562378, 'cur_kl_coeff': 0.30000001192092896, 'policy_loss': -0.07667955011129379, 'vf_explained_var': 0.7851062417030334, 'entropy': 1.8610098361968994, 'total_loss': 1.7795567512512207, 'kl': 0.021813293918967247}, 'sample_time_ms': 48139.086, 'num_steps_sampled': 939600, 'grad_time_ms': 468.525, 'num_steps_trained': 939600, 'load_time_ms': 10.566, 'update_time_ms': 124.756}",783,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.07024359703064,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,3600,939600,{},3,290,-10.064511670467962,2025-09-05_01-20-38,4.026854004253584,3651949,1757028038,3.776355211402988,31801.71820282936,74094,4.1482758620689655
+cda-server-2,False,193.2772831916809,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 1.7610725164413452, 'cur_kl_coeff': 0.44999995827674866, 'policy_loss': -0.0601881742477417, 'vf_explained_var': 0.8007305860519409, 'entropy': 1.8276609182357788, 'total_loss': 1.7076600790023804, 'kl': 0.015057351440191269}, 'sample_time_ms': 47765.869, 'num_steps_sampled': 940800, 'grad_time_ms': 441.014, 'num_steps_trained': 940800, 'load_time_ms': 8.085, 'update_time_ms': 94.317}",784,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.014333963394165,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,4800,940800,{},4,292,-13.549796048554573,2025-09-05_01-21-25,4.006583956578801,3651949,1757028085,3.838442210095396,31848.732536792755,74386,4.113013698630137
+cda-server-2,False,242.63713192939758,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 1.4205937385559082, 'cur_kl_coeff': 0.44999995827674866, 'policy_loss': -0.050313714891672134, 'vf_explained_var': 0.8278232216835022, 'entropy': 1.7518596649169922, 'total_loss': 1.3765041828155518, 'kl': 0.013831070624291897}, 'sample_time_ms': 48007.749, 'num_steps_sampled': 942000, 'grad_time_ms': 427.871, 'num_steps_trained': 942000, 'load_time_ms': 6.591, 'update_time_ms': 75.96}",785,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",49.359848737716675,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,6000,942000,{},5,294,-13.06073049907241,2025-09-05_01-22-14,4.128189074343364,3651949,1757028134,3.9022232455595676,31898.09238553047,74680,4.068027210884353
+cda-server-2,False,289.53229904174805,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.4408969283103943, 'cur_kl_coeff': 0.44999995827674866, 'policy_loss': -0.05928817018866539, 'vf_explained_var': 0.926021158695221, 'entropy': 1.74524986743927, 'total_loss': 0.3913138806819916, 'kl': 0.021566830575466156}, 'sample_time_ms': 47760.908, 'num_steps_sampled': 943200, 'grad_time_ms': 416.361, 'num_steps_trained': 943200, 'load_time_ms': 5.599, 'update_time_ms': 63.697}",786,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.895167112350464,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,7200,943200,{},6,298,-2.1567221200411755,2025-09-05_01-23-01,8.000010802354078,3651949,1757028181,3.96651188022375,31944.987552642822,74978,4.030201342281879
+cda-server-2,False,336.5793924331665,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.22498305141925812, 'cur_kl_coeff': 0.675000011920929, 'policy_loss': -0.042830690741539, 'vf_explained_var': 0.9598996639251709, 'entropy': 1.6507837772369385, 'total_loss': 0.19249561429023743, 'kl': 0.015323377214372158}, 'sample_time_ms': 47606.235, 'num_steps_sampled': 944400, 'grad_time_ms': 408.122, 'num_steps_trained': 944400, 'load_time_ms': 4.891, 'update_time_ms': 54.96}",787,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.04709339141846,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,8400,944400,{},7,298,-2.024536533868339,2025-09-05_01-23-48,4.124295920144162,3651949,1757028228,3.9601303116462363,31992.03464603424,75276,4.026845637583893
+cda-server-2,False,383.75608229637146,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 2.5072805881500244, 'cur_kl_coeff': 0.675000011920929, 'policy_loss': -0.0695737674832344, 'vf_explained_var': 0.7216205596923828, 'entropy': 1.8538966178894043, 'total_loss': 2.451732635498047, 'kl': 0.020778659731149673}, 'sample_time_ms': 47503.384, 'num_steps_sampled': 945600, 'grad_time_ms': 405.125, 'num_steps_trained': 945600, 'load_time_ms': 4.36, 'update_time_ms': 48.391}",788,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.176689863204956,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,9600,945600,{},8,289,-12.91145736728022,2025-09-05_01-24-36,4.0205117253454805,3651949,1757028276,3.752205695488996,32039.211335897446,75565,4.155709342560554
+cda-server-2,False,430.90940523147583,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 5.012481689453125, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.059946220368146896, 'vf_explained_var': 0.6565014719963074, 'entropy': 1.9285272359848022, 'total_loss': 4.964396953582764, 'kl': 0.011715345084667206}, 'sample_time_ms': 47422.232, 'num_steps_sampled': 946800, 'grad_time_ms': 401.341, 'num_steps_trained': 946800, 'load_time_ms': 3.953, 'update_time_ms': 43.302}",789,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.15332293510437,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,10800,946800,{},9,289,-25.47250893244872,2025-09-05_01-25-23,4.12287126366258,3651949,1757028323,3.7499060035607386,32086.36465883255,75854,4.1522491349480966
+cda-server-2,False,478.20979285240173,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 2.250943183898926, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.06416141986846924, 'vf_explained_var': 0.7748057842254639, 'entropy': 1.8672207593917847, 'total_loss': 2.2019548416137695, 'kl': 0.014985635876655579}, 'sample_time_ms': 47370.318, 'num_steps_sampled': 948000, 'grad_time_ms': 399.985, 'num_steps_trained': 948000, 'load_time_ms': 3.62, 'update_time_ms': 39.226}",790,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.3003876209259,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,12000,948000,{},10,290,-14.061992362304128,2025-09-05_01-26-10,8.000028237344964,3651949,1757028370,3.8105384396577486,32133.665046453476,76144,4.127586206896551
+cda-server-2,False,525.4595472812653,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.9491912126541138, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.049380749464035034, 'vf_explained_var': 0.8589465618133545, 'entropy': 1.8180402517318726, 'total_loss': 0.9103296995162964, 'kl': 0.010389466769993305}, 'sample_time_ms': 46996.824, 'num_steps_sampled': 949200, 'grad_time_ms': 372.868, 'num_steps_trained': 949200, 'load_time_ms': 0.663, 'update_time_ms': 2.622}",791,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.249754428863525,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,13200,949200,{},11,294,-5.406530040356589,2025-09-05_01-26-57,4.094243137303819,3651949,1757028417,3.879864113613836,32180.91480088234,76438,4.081632653061225
+cda-server-2,False,572.4544923305511,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.4377812147140503, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.06339593976736069, 'vf_explained_var': 0.9286383390426636, 'entropy': 1.7569670677185059, 'total_loss': 0.3850562870502472, 'kl': 0.010539311915636063}, 'sample_time_ms': 46945.37, 'num_steps_sampled': 950400, 'grad_time_ms': 371.132, 'num_steps_trained': 950400, 'load_time_ms': 0.656, 'update_time_ms': 2.657}",792,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.99494504928589,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,14400,950400,{},12,296,-4.968387219343455,2025-09-05_01-27-44,8.000353116834368,3651949,1757028464,3.928256344203121,32227.909745931625,76734,4.054054054054054
+cda-server-2,False,619.7707614898682,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 6.112908840179443, 'cur_kl_coeff': 1.0125000476837158, 'policy_loss': -0.04459194839000702, 'vf_explained_var': 0.5795450806617737, 'entropy': 1.667588710784912, 'total_loss': 6.097717761993408, 'kl': 0.0290378425270319}, 'sample_time_ms': 46971.848, 'num_steps_sampled': 951600, 'grad_time_ms': 369.421, 'num_steps_trained': 951600, 'load_time_ms': 0.652, 'update_time_ms': 2.608}",793,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.31626915931702,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,15600,951600,{},13,291,-24.589137243397587,2025-09-05_01-28-32,4.036547243347274,3651949,1757028512,3.88809978551038,32275.226015090942,77025,4.072164948453608
+cda-server-2,False,666.9650793075562,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 1.718286395072937, 'cur_kl_coeff': 1.5187499523162842, 'policy_loss': -0.06419695913791656, 'vf_explained_var': 0.7822743654251099, 'entropy': 1.86990487575531, 'total_loss': 1.6808961629867554, 'kl': 0.01765047013759613}, 'sample_time_ms': 46988.306, 'num_steps_sampled': 952800, 'grad_time_ms': 371.019, 'num_steps_trained': 952800, 'load_time_ms': 0.648, 'update_time_ms': 2.552}",794,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.19431781768799,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,16800,952800,{},14,289,-12.701008803173178,2025-09-05_01-29-19,4.203500124253747,3651949,1757028559,3.676192982592369,32322.42033290863,77314,4.211072664359862
+cda-server-2,False,713.8903295993805,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 4.264537334442139, 'cur_kl_coeff': 1.5187499523162842, 'policy_loss': -0.04881961643695831, 'vf_explained_var': 0.6670060157775879, 'entropy': 1.85612952709198, 'total_loss': 4.239828109741211, 'kl': 0.01587512157857418}, 'sample_time_ms': 46743.49, 'num_steps_sampled': 954000, 'grad_time_ms': 372.268, 'num_steps_trained': 954000, 'load_time_ms': 0.665, 'update_time_ms': 2.582}",795,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.92525029182434,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,18000,954000,{},15,288,-12.980148394396917,2025-09-05_01-30-06,4.065503414672037,3651949,1757028606,3.7922930521983074,32369.345583200455,77602,4.131944444444445
+cda-server-2,False,761.1560180187225,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.30072930455207825, 'cur_kl_coeff': 1.5187499523162842, 'policy_loss': -0.05964440107345581, 'vf_explained_var': 0.9533084034919739, 'entropy': 1.6481891870498657, 'total_loss': 0.2712497115135193, 'kl': 0.019861610606312752}, 'sample_time_ms': 46778.854, 'num_steps_sampled': 955200, 'grad_time_ms': 373.989, 'num_steps_trained': 955200, 'load_time_ms': 0.665, 'update_time_ms': 2.611}",796,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.26568841934204,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,19200,955200,{},16,298,-9.968730862637617,2025-09-05_01-30-53,4.0024867475246,3651949,1757028653,3.8975311680485905,32416.611271619797,77900,4.067114093959732
+cda-server-2,False,808.0145020484924,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.8040688633918762, 'cur_kl_coeff': 1.5187499523162842, 'policy_loss': -0.032848622649908066, 'vf_explained_var': 0.915114164352417, 'entropy': 1.5866845846176147, 'total_loss': 0.7948029637336731, 'kl': 0.01552779134362936}, 'sample_time_ms': 46760.331, 'num_steps_sampled': 956400, 'grad_time_ms': 373.74, 'num_steps_trained': 956400, 'load_time_ms': 0.662, 'update_time_ms': 2.629}",797,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.8584840297699,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,20400,956400,{},17,297,-13.755648820316434,2025-09-05_01-31-40,4.001205095622689,3651949,1757028700,3.9404580151675765,32463.469755649567,78197,4.040404040404041
+cda-server-2,False,855.3551073074341,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 1.7433048486709595, 'cur_kl_coeff': 1.5187499523162842, 'policy_loss': -0.03473437577486038, 'vf_explained_var': 0.8053309321403503, 'entropy': 1.6424217224121094, 'total_loss': 1.7213722467422485, 'kl': 0.008429242298007011}, 'sample_time_ms': 46777.43, 'num_steps_sampled': 957600, 'grad_time_ms': 373.041, 'num_steps_trained': 957600, 'load_time_ms': 0.671, 'update_time_ms': 2.614}",798,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.34060525894165,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,21600,957600,{},18,296,-15.330119717390737,2025-09-05_01-32-27,8.000000401861136,3651949,1757028747,3.9213278219855128,32510.81036090851,78493,4.050675675675675
+cda-server-2,False,902.4068982601166,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 3.922396421432495, 'cur_kl_coeff': 1.5187499523162842, 'policy_loss': -0.045828308910131454, 'vf_explained_var': 0.6741072535514832, 'entropy': 1.671573281288147, 'total_loss': 3.9039247035980225, 'kl': 0.018012363463640213}, 'sample_time_ms': 46768.543, 'num_steps_sampled': 958800, 'grad_time_ms': 371.807, 'num_steps_trained': 958800, 'load_time_ms': 0.666, 'update_time_ms': 2.59}",799,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.051790952682495,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,22800,958800,{},19,292,-23.708678122556933,2025-09-05_01-33-14,4.001030656455413,3651949,1757028794,3.8258956724124005,32557.86215186119,78785,4.109589041095891
+cda-server-2,False,949.1350507736206,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 1.8681204319000244, 'cur_kl_coeff': 1.5187499523162842, 'policy_loss': -0.06701643019914627, 'vf_explained_var': 0.7929180264472961, 'entropy': 1.9429833889007568, 'total_loss': 1.827116847038269, 'kl': 0.017127802595496178}, 'sample_time_ms': 46714.402, 'num_steps_sampled': 960000, 'grad_time_ms': 368.803, 'num_steps_trained': 960000, 'load_time_ms': 0.678, 'update_time_ms': 2.564}",800,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.72815251350403,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,24000,960000,{},20,288,-12.98841631792866,2025-09-05_01-34-01,4.002175431387786,3651949,1757028841,3.7431750070060352,32604.590304374695,79073,4.163194444444445
+cda-server-2,False,997.0902171134949,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.3580002188682556, 'cur_kl_coeff': 1.5187499523162842, 'policy_loss': -0.05703965947031975, 'vf_explained_var': 0.9418891072273254, 'entropy': 1.6511077880859375, 'total_loss': 0.31548604369163513, 'kl': 0.009564097970724106}, 'sample_time_ms': 46785.388, 'num_steps_sampled': 961200, 'grad_time_ms': 368.434, 'num_steps_trained': 961200, 'load_time_ms': 0.676, 'update_time_ms': 2.57}",801,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.95516633987427,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,25200,961200,{},21,297,-4.085459592416555,2025-09-05_01-34-49,4.162939948548514,3651949,1757028889,3.9406939439213287,32652.54547071457,79370,4.040404040404041
+cda-server-2,False,1044.357929944992,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.18730320036411285, 'cur_kl_coeff': 1.5187499523162842, 'policy_loss': -0.04268259555101395, 'vf_explained_var': 0.9681369066238403, 'entropy': 1.6600903272628784, 'total_loss': 0.15792995691299438, 'kl': 0.00876335147768259}, 'sample_time_ms': 46811.914, 'num_steps_sampled': 962400, 'grad_time_ms': 369.199, 'num_steps_trained': 962400, 'load_time_ms': 0.679, 'update_time_ms': 2.536}",802,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.26771283149719,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,26400,962400,{},22,299,-2.0613225091415845,2025-09-05_01-35-36,4.02449057563354,3651949,1757028936,3.980055908652997,32699.813183546066,79669,4.013377926421405
+cda-server-2,False,1091.4220464229584,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.708747923374176, 'cur_kl_coeff': 1.5187499523162842, 'policy_loss': -0.05166047438979149, 'vf_explained_var': 0.8876267671585083, 'entropy': 1.7017515897750854, 'total_loss': 0.6666974425315857, 'kl': 0.006327613722532988}, 'sample_time_ms': 46784.316, 'num_steps_sampled': 963600, 'grad_time_ms': 371.494, 'num_steps_trained': 963600, 'load_time_ms': 0.695, 'update_time_ms': 2.546}",803,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.06411647796631,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,27600,963600,{},23,297,-3.7565080186063238,2025-09-05_01-36-24,8.000000616166519,3651949,1757028984,3.9177281391629033,32746.877300024033,79966,4.053872053872054
+cda-server-2,False,1139.3512353897095,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.6867063045501709, 'cur_kl_coeff': 1.5187499523162842, 'policy_loss': -0.037311896681785583, 'vf_explained_var': 0.903723418712616, 'entropy': 1.8005733489990234, 'total_loss': 0.6543222069740295, 'kl': 0.003244699677452445}, 'sample_time_ms': 46857.686, 'num_steps_sampled': 964800, 'grad_time_ms': 371.531, 'num_steps_trained': 964800, 'load_time_ms': 0.698, 'update_time_ms': 2.574}",804,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.9291889667511,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,28800,964800,{},24,295,-5.504860003203149,2025-09-05_01-37-11,4.130190802436839,3651949,1757029031,3.928988667431524,32794.806488990784,80261,4.047457627118644
+cda-server-2,False,1186.4974427223206,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.7119202017784119, 'cur_kl_coeff': 0.7593749761581421, 'policy_loss': -0.050469715148210526, 'vf_explained_var': 0.8853400945663452, 'entropy': 1.776171326637268, 'total_loss': 0.6910502910614014, 'kl': 0.03897910937666893}, 'sample_time_ms': 46880.225, 'num_steps_sampled': 966000, 'grad_time_ms': 371.179, 'num_steps_trained': 966000, 'load_time_ms': 0.682, 'update_time_ms': 2.557}",805,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.146207332611084,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,30000,966000,{},25,296,-6.568379425077559,2025-09-05_01-37-59,8.000022452656822,3651949,1757029079,3.897963507153022,32841.952696323395,80557,4.070945945945946
+cda-server-2,False,1235.047001838684,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 1.4876036643981934, 'cur_kl_coeff': 1.139062523841858, 'policy_loss': -0.05569356679916382, 'vf_explained_var': 0.7973092794418335, 'entropy': 1.8454415798187256, 'total_loss': 1.4398431777954102, 'kl': 0.006964581087231636}, 'sample_time_ms': 47008.397, 'num_steps_sampled': 967200, 'grad_time_ms': 371.409, 'num_steps_trained': 967200, 'load_time_ms': 0.676, 'update_time_ms': 2.527}",806,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",48.549559116363525,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,31200,967200,{},26,290,-8.652763661169601,2025-09-05_01-38-47,4.016446949720436,3651949,1757029127,3.8287529921794836,32890.50225543976,80847,4.113793103448276
+cda-server-2,False,1281.8840281963348,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 6.264103412628174, 'cur_kl_coeff': 1.139062523841858, 'policy_loss': -0.05899304896593094, 'vf_explained_var': 0.5897148251533508, 'entropy': 2.0083160400390625, 'total_loss': 6.223005294799805, 'kl': 0.015710312873125076}, 'sample_time_ms': 47005.169, 'num_steps_sampled': 968400, 'grad_time_ms': 372.504, 'num_steps_trained': 968400, 'load_time_ms': 0.689, 'update_time_ms': 2.518}",807,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.83702635765076,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,32400,968400,{},27,287,-21.911524032019354,2025-09-05_01-39-34,4.7794433488924115,3651949,1757029174,3.695486852089204,32937.33928179741,81134,4.195121951219512
+cda-server-2,False,1329.7248368263245,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 1.4413554668426514, 'cur_kl_coeff': 1.139062523841858, 'policy_loss': -0.05914253741502762, 'vf_explained_var': 0.8244356513023376, 'entropy': 1.827782154083252, 'total_loss': 1.394775390625, 'kl': 0.011028682813048363}, 'sample_time_ms': 47057.533, 'num_steps_sampled': 969600, 'grad_time_ms': 370.213, 'num_steps_trained': 969600, 'load_time_ms': 0.676, 'update_time_ms': 2.515}",808,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.840808629989624,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,33600,969600,{},28,292,-9.395980129400694,2025-09-05_01-40-22,4.1590783756023155,3651949,1757029222,3.8544892844564207,32985.1800904274,81426,4.102739726027397
+cda-server-2,False,1377.2052764892578,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.7015355825424194, 'cur_kl_coeff': 1.139062523841858, 'policy_loss': -0.04539366811513901, 'vf_explained_var': 0.9010327458381653, 'entropy': 1.6855577230453491, 'total_loss': 0.6637807488441467, 'kl': 0.006706247106194496}, 'sample_time_ms': 47097.643, 'num_steps_sampled': 970800, 'grad_time_ms': 372.945, 'num_steps_trained': 970800, 'load_time_ms': 0.677, 'update_time_ms': 2.511}",809,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.48043966293335,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,34800,970800,{},29,295,-7.3428326232683965,2025-09-05_01-41-09,4.413765057282951,3651949,1757029269,3.9358162676219055,33032.66053009033,81721,4.047457627118644
+cda-server-2,False,1424.1667184829712,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 1.8058637380599976, 'cur_kl_coeff': 1.139062523841858, 'policy_loss': -0.04731789603829384, 'vf_explained_var': 0.7754788994789124, 'entropy': 1.6680302619934082, 'total_loss': 1.7740479707717896, 'kl': 0.01360949594527483}, 'sample_time_ms': 47119.038, 'num_steps_sampled': 972000, 'grad_time_ms': 374.771, 'num_steps_trained': 972000, 'load_time_ms': 0.667, 'update_time_ms': 2.536}",810,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.96144199371338,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,36000,972000,{},30,295,-10.516404672326125,2025-09-05_01-41-56,8.000012660669258,3651949,1757029316,3.847911428426083,33079.621972084045,82016,4.098305084745762
+cda-server-2,False,1471.9550507068634,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.7151005268096924, 'cur_kl_coeff': 1.139062523841858, 'policy_loss': -0.050695180892944336, 'vf_explained_var': 0.8930562138557434, 'entropy': 1.7088598012924194, 'total_loss': 0.6887589693069458, 'kl': 0.021380571648478508}, 'sample_time_ms': 47103.509, 'num_steps_sampled': 973200, 'grad_time_ms': 373.547, 'num_steps_trained': 973200, 'load_time_ms': 0.668, 'update_time_ms': 2.597}",811,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.78833222389221,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,37200,973200,{},31,295,-6.557950140927627,2025-09-05_01-42-44,8.000019141972288,3651949,1757029364,3.9000041977097872,33127.41030430794,82311,4.064406779661017
+cda-server-2,False,1518.968185186386,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 3.846207618713379, 'cur_kl_coeff': 1.708593726158142, 'policy_loss': -0.0323285274207592, 'vf_explained_var': 0.6994062662124634, 'entropy': 1.7498440742492676, 'total_loss': 3.844479560852051, 'kl': 0.017909592017531395}, 'sample_time_ms': 47078.915, 'num_steps_sampled': 974400, 'grad_time_ms': 372.627, 'num_steps_trained': 974400, 'load_time_ms': 0.665, 'update_time_ms': 2.581}",812,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.013134479522705,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,38400,974400,{},32,292,-20.47005127444236,2025-09-05_01-43-31,4.162667764236382,3651949,1757029411,3.8325548653714554,33174.42343878746,82603,4.109589041095891
+cda-server-2,False,1566.1076924800873,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 2.445774793624878, 'cur_kl_coeff': 1.708593726158142, 'policy_loss': -0.045447684824466705, 'vf_explained_var': 0.7744918465614319, 'entropy': 1.6828818321228027, 'total_loss': 2.407973527908325, 'kl': 0.004475479479879141}, 'sample_time_ms': 47087.186, 'num_steps_sampled': 975600, 'grad_time_ms': 371.87, 'num_steps_trained': 975600, 'load_time_ms': 0.655, 'update_time_ms': 2.651}",813,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.13950729370117,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,39600,975600,{},33,294,-20.252988802667094,2025-09-05_01-44-18,4.026454987915971,3651949,1757029458,3.8777027598765352,33221.56294608116,82897,4.081632653061225
+cda-server-2,False,1613.4467389583588,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.47300609946250916, 'cur_kl_coeff': 0.854296863079071, 'policy_loss': -0.057291433215141296, 'vf_explained_var': 0.9230258464813232, 'entropy': 1.5881778001785278, 'total_loss': 0.4345957636833191, 'kl': 0.022101333364844322}, 'sample_time_ms': 47027.47, 'num_steps_sampled': 976800, 'grad_time_ms': 372.629, 'num_steps_trained': 976800, 'load_time_ms': 0.666, 'update_time_ms': 2.621}",814,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.339046478271484,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,40800,976800,{},34,297,-4.0787353271623665,2025-09-05_01-45-06,4.00314741677162,3651949,1757029506,3.9363969553082097,33268.90199255943,83194,4.043771043771044
+cda-server-2,False,1660.20107960701,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 2.3579092025756836, 'cur_kl_coeff': 1.2814452648162842, 'policy_loss': -0.0439508855342865, 'vf_explained_var': 0.752430260181427, 'entropy': 1.8941396474838257, 'total_loss': 2.323559522628784, 'kl': 0.0074925231747329235}, 'sample_time_ms': 46988.425, 'num_steps_sampled': 978000, 'grad_time_ms': 372.503, 'num_steps_trained': 978000, 'load_time_ms': 0.67, 'update_time_ms': 2.615}",815,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.75434064865112,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,42000,978000,{},35,291,-15.850401724105637,2025-09-05_01-45-53,4.295421785432054,3651949,1757029553,3.8307856738404347,33315.656333208084,83485,4.11340206185567
+cda-server-2,False,1707.0376374721527,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.8707294464111328, 'cur_kl_coeff': 1.2814452648162842, 'policy_loss': -0.040706999599933624, 'vf_explained_var': 0.8828989863395691, 'entropy': 1.745435357093811, 'total_loss': 0.8379433155059814, 'kl': 0.006181230768561363}, 'sample_time_ms': 46817.656, 'num_steps_sampled': 979200, 'grad_time_ms': 371.985, 'num_steps_trained': 979200, 'load_time_ms': 0.681, 'update_time_ms': 2.609}",816,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.83655786514282,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,43200,979200,{},36,294,-6.977797278917819,2025-09-05_01-46-39,6.014182721436078,3651949,1757029599,3.8778004777755335,33362.49289107323,83779,4.078231292517007
+cda-server-2,False,1754.92271900177,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.2831428647041321, 'cur_kl_coeff': 1.2814452648162842, 'policy_loss': -0.0387619249522686, 'vf_explained_var': 0.950989305973053, 'entropy': 1.6422532796859741, 'total_loss': 0.2551690340042114, 'kl': 0.008418700657784939}, 'sample_time_ms': 46920.809, 'num_steps_sampled': 980400, 'grad_time_ms': 373.585, 'num_steps_trained': 980400, 'load_time_ms': 0.686, 'update_time_ms': 2.596}",817,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.88508152961731,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,44400,980400,{},37,297,-2.094870018555298,2025-09-05_01-47-27,4.004102837999562,3651949,1757029647,3.934081516235194,33410.377972602844,84076,4.043771043771044
+cda-server-2,False,1802.4472270011902,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.018896961584687233, 'cur_kl_coeff': 1.2814452648162842, 'policy_loss': -0.029149293899536133, 'vf_explained_var': 0.9966424107551575, 'entropy': 1.5248656272888184, 'total_loss': 0.004489346407353878, 'kl': 0.011503946036100388}, 'sample_time_ms': 46887.154, 'num_steps_sampled': 981600, 'grad_time_ms': 375.555, 'num_steps_trained': 981600, 'load_time_ms': 0.692, 'update_time_ms': 2.625}",818,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.524507999420166,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,45600,981600,{},38,300,4.000074564187001,2025-09-05_01-48-15,5.025365209335014,3651949,1757029695,4.003906200630369,33457.902480602264,84376,4.0
+cda-server-2,False,1849.8041031360626,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.6453438997268677, 'cur_kl_coeff': 1.2814452648162842, 'policy_loss': -0.04249809309840202, 'vf_explained_var': 0.9167090654373169, 'entropy': 1.6513155698776245, 'total_loss': 0.6087195873260498, 'kl': 0.00458371639251709}, 'sample_time_ms': 46877.909, 'num_steps_sampled': 982800, 'grad_time_ms': 372.472, 'num_steps_trained': 982800, 'load_time_ms': 0.69, 'update_time_ms': 2.641}",819,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.35687613487244,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,46800,982800,{},39,298,-10.90554380198309,2025-09-05_01-49-02,8.000000650903239,3651949,1757029742,3.9313736255395266,33505.25935673714,84674,4.043624161073826
+cda-server-2,False,1898.446433544159,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.06971941888332367, 'cur_kl_coeff': 0.6407226324081421, 'policy_loss': -0.03713615611195564, 'vf_explained_var': 0.9876842498779297, 'entropy': 1.5876364707946777, 'total_loss': 0.04077058285474777, 'kl': 0.012778243981301785}, 'sample_time_ms': 47047.778, 'num_steps_sampled': 984000, 'grad_time_ms': 370.778, 'num_steps_trained': 984000, 'load_time_ms': 0.692, 'update_time_ms': 2.63}",820,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",48.64233040809631,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,48000,984000,{},40,300,4.000073832819711,2025-09-05_01-49-51,7.0225889626646305,3651949,1757029791,4.010305626357008,33553.90168714523,84974,3.9966666666666666
+cda-server-2,False,1945.9118869304657,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.45672476291656494, 'cur_kl_coeff': 0.6407226324081421, 'policy_loss': -0.039986565709114075, 'vf_explained_var': 0.9329177141189575, 'entropy': 1.629403829574585, 'total_loss': 0.4218178987503052, 'kl': 0.007928045466542244}, 'sample_time_ms': 47012.644, 'num_steps_sampled': 985200, 'grad_time_ms': 373.683, 'num_steps_trained': 985200, 'load_time_ms': 0.692, 'update_time_ms': 2.524}",821,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.46545338630676,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,49200,985200,{},41,296,-6.565736108482088,2025-09-05_01-50-38,4.008295094160271,3651949,1757029838,3.933905593295191,33601.36714053154,85270,4.043918918918919
+cda-server-2,False,1993.0809333324432,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.4314078986644745, 'cur_kl_coeff': 0.6407226324081421, 'policy_loss': -0.04592595249414444, 'vf_explained_var': 0.9293683171272278, 'entropy': 1.659238338470459, 'total_loss': 0.39231163263320923, 'kl': 0.010659330524504185}, 'sample_time_ms': 47026.958, 'num_steps_sampled': 986400, 'grad_time_ms': 374.914, 'num_steps_trained': 986400, 'load_time_ms': 0.686, 'update_time_ms': 2.545}",822,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.16904640197754,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,50400,986400,{},42,297,-5.6757054842679615,2025-09-05_01-51-26,5.235548155599991,3651949,1757029886,3.9428765355605506,33648.53618693352,85567,4.040404040404041
+cda-server-2,False,2039.9213824272156,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.5089528560638428, 'cur_kl_coeff': 0.6407226324081421, 'policy_loss': -0.03401399031281471, 'vf_explained_var': 0.922917366027832, 'entropy': 1.7040413618087769, 'total_loss': 0.49460160732269287, 'kl': 0.030688460916280746}, 'sample_time_ms': 46997.095, 'num_steps_sampled': 987600, 'grad_time_ms': 374.821, 'num_steps_trained': 987600, 'load_time_ms': 0.685, 'update_time_ms': 2.525}",823,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.84044909477234,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,51600,987600,{},43,295,-6.6121411046581,2025-09-05_01-52-12,4.108682707183751,3651949,1757029932,3.9076012637444864,33695.37663602829,85862,4.064406779661017
+cda-server-2,False,2087.2078564167023,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 1.1404789686203003, 'cur_kl_coeff': 0.9610840678215027, 'policy_loss': -0.039709579199552536, 'vf_explained_var': 0.845872163772583, 'entropy': 1.638351559638977, 'total_loss': 1.1233168840408325, 'kl': 0.023460354655981064}, 'sample_time_ms': 46991.99, 'num_steps_sampled': 988800, 'grad_time_ms': 374.628, 'num_steps_trained': 988800, 'load_time_ms': 0.677, 'update_time_ms': 2.581}",824,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.286473989486694,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,52800,988800,{},44,294,-8.143959451174759,2025-09-05_01-53-00,4.00212314965405,3651949,1757029980,3.848402377541737,33742.66311001778,86156,4.095238095238095
+cda-server-2,False,2134.2769277095795,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 1.7005854845046997, 'cur_kl_coeff': 1.441625952720642, 'policy_loss': -0.03192416951060295, 'vf_explained_var': 0.8220586776733398, 'entropy': 1.6427675485610962, 'total_loss': 1.6911768913269043, 'kl': 0.015618092380464077}, 'sample_time_ms': 47026.055, 'num_steps_sampled': 990000, 'grad_time_ms': 372.013, 'num_steps_trained': 990000, 'load_time_ms': 0.672, 'update_time_ms': 2.582}",825,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.0690712928772,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,54000,990000,{},45,295,-15.536150481328825,2025-09-05_01-53-47,4.807690528253136,3651949,1757030027,3.919624825657352,33789.732181310654,86451,4.054237288135593
+cda-server-2,False,2182.042104244232,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.38604265451431274, 'cur_kl_coeff': 1.441625952720642, 'policy_loss': -0.04679034650325775, 'vf_explained_var': 0.939765214920044, 'entropy': 1.768547773361206, 'total_loss': 0.35393983125686646, 'kl': 0.010188158601522446}, 'sample_time_ms': 47120.443, 'num_steps_sampled': 991200, 'grad_time_ms': 370.431, 'num_steps_trained': 991200, 'load_time_ms': 0.668, 'update_time_ms': 2.597}",826,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.76517653465271,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,55200,991200,{},46,297,-6.033127112083864,2025-09-05_01-54-35,4.005169908150885,3651949,1757030075,3.9243020513043256,33837.49735784531,86748,4.05050505050505
+cda-server-2,False,2229.09677529335,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.635163426399231, 'cur_kl_coeff': 1.441625952720642, 'policy_loss': -0.03038494847714901, 'vf_explained_var': 0.9094793796539307, 'entropy': 1.6650561094284058, 'total_loss': 0.6262122392654419, 'kl': 0.01486778724938631}, 'sample_time_ms': 47039.827, 'num_steps_sampled': 992400, 'grad_time_ms': 368.081, 'num_steps_trained': 992400, 'load_time_ms': 0.663, 'update_time_ms': 2.586}",827,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.05467104911804,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,56400,992400,{},47,296,-7.024247210587671,2025-09-05_01-55-22,4.0011468467636035,3651949,1757030122,3.920057117680865,33884.552028894424,87044,4.050675675675675
+cda-server-2,False,2276.7545762062073,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.6999471187591553, 'cur_kl_coeff': 1.441625952720642, 'policy_loss': -0.030680673196911812, 'vf_explained_var': 0.8954805731773376, 'entropy': 1.6117959022521973, 'total_loss': 0.6958112716674805, 'kl': 0.01841317117214203}, 'sample_time_ms': 47051.993, 'num_steps_sampled': 993600, 'grad_time_ms': 369.248, 'num_steps_trained': 993600, 'load_time_ms': 0.658, 'update_time_ms': 2.567}",828,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.657800912857056,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,57600,993600,{},48,297,-8.890578178095616,2025-09-05_01-56-09,4.00169898411644,3651949,1757030169,3.943147987212985,33932.20982980728,87341,4.037037037037037
+cda-server-2,False,2323.897565126419,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.77970290184021, 'cur_kl_coeff': 1.441625952720642, 'policy_loss': -0.03267605975270271, 'vf_explained_var': 0.8793459534645081, 'entropy': 1.5757498741149902, 'total_loss': 0.7723354697227478, 'kl': 0.017555641010403633}, 'sample_time_ms': 47029.896, 'num_steps_sampled': 994800, 'grad_time_ms': 369.836, 'num_steps_trained': 994800, 'load_time_ms': 0.663, 'update_time_ms': 2.566}",829,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.14298892021179,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,58800,994800,{},49,297,-8.222418084432903,2025-09-05_01-56-57,4.001790133079298,3651949,1757030217,3.932019531520677,33979.35281872749,87638,4.043771043771044
+cda-server-2,False,2370.9230823516846,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.9294682741165161, 'cur_kl_coeff': 1.441625952720642, 'policy_loss': -0.032422881573438644, 'vf_explained_var': 0.865056574344635, 'entropy': 1.6917376518249512, 'total_loss': 0.9142765402793884, 'kl': 0.011952572502195835}, 'sample_time_ms': 46868.697, 'num_steps_sampled': 996000, 'grad_time_ms': 369.332, 'num_steps_trained': 996000, 'load_time_ms': 0.663, 'update_time_ms': 2.569}",830,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.0255172252655,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,60000,996000,{},50,296,-9.300588835898967,2025-09-05_01-57-44,5.1430368160958,3651949,1757030264,3.931883483272182,34026.37833595276,87934,4.047297297297297
+cda-server-2,False,2417.963764667511,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 1.0987236499786377, 'cur_kl_coeff': 1.441625952720642, 'policy_loss': -0.04377513751387596, 'vf_explained_var': 0.8681024312973022, 'entropy': 1.7562556266784668, 'total_loss': 1.060706615447998, 'kl': 0.0039943247102200985}, 'sample_time_ms': 46827.398, 'num_steps_sampled': 997200, 'grad_time_ms': 368.127, 'num_steps_trained': 997200, 'load_time_ms': 0.665, 'update_time_ms': 2.606}",831,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.040682315826416,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,61200,997200,{},51,297,-12.745162180962872,2025-09-05_01-58-31,7.02983745904732,3651949,1757030311,3.939681231334518,34073.419018268585,88231,4.05050505050505
+cda-server-2,False,2465.346343755722,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.13873285055160522, 'cur_kl_coeff': 0.720812976360321, 'policy_loss': -0.0576799176633358, 'vf_explained_var': 0.9778363108634949, 'entropy': 1.6161856651306152, 'total_loss': 0.09548873454332352, 'kl': 0.020027123391628265}, 'sample_time_ms': 46850.782, 'num_steps_sampled': 998400, 'grad_time_ms': 366.186, 'num_steps_trained': 998400, 'load_time_ms': 0.666, 'update_time_ms': 2.606}",832,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.38257908821106,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,62400,998400,{},52,299,-2.147231516732976,2025-09-05_01-59-18,4.095067117194328,3651949,1757030358,3.9799889385349365,34120.801597356796,88530,4.013377926421405
+cda-server-2,False,2512.917120218277,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.2457091212272644, 'cur_kl_coeff': 1.0812194347381592, 'policy_loss': -0.02650185115635395, 'vf_explained_var': 0.9576071500778198, 'entropy': 1.5629717111587524, 'total_loss': 0.2660810649394989, 'kl': 0.04335271939635277}, 'sample_time_ms': 46923.492, 'num_steps_sampled': 999600, 'grad_time_ms': 366.587, 'num_steps_trained': 999600, 'load_time_ms': 0.667, 'update_time_ms': 2.581}",833,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.57077646255493,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,63600,999600,{},53,299,-4.067028611526574,2025-09-05_02-00-06,4.002652899814295,3651949,1757030406,3.965485489524469,34168.37237381935,88829,4.023411371237458
+cda-server-2,False,2560.136365890503,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.1777401566505432, 'cur_kl_coeff': 1.6218292713165283, 'policy_loss': -0.03697414696216583, 'vf_explained_var': 0.9705994129180908, 'entropy': 1.5341241359710693, 'total_loss': 0.1595187485218048, 'kl': 0.011562712490558624}, 'sample_time_ms': 46917.701, 'num_steps_sampled': 1000800, 'grad_time_ms': 365.683, 'num_steps_trained': 1000800, 'load_time_ms': 0.666, 'update_time_ms': 2.508}",834,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.21924567222595,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,64800,1000800,{},54,296,-3.8470397589171252,2025-09-05_02-00-53,4.0097646123115815,3651949,1757030453,3.950852984420732,34215.59161949158,89125,4.033783783783784
+cda-server-2,False,2607.7824144363403,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.0038141896948218346, 'cur_kl_coeff': 1.6218292713165283, 'policy_loss': -0.10900921374559402, 'vf_explained_var': 0.9992968440055847, 'entropy': 1.586694359779358, 'total_loss': -0.05552603676915169, 'kl': 0.030625291168689728}, 'sample_time_ms': 46973.064, 'num_steps_sampled': 1002000, 'grad_time_ms': 368.008, 'num_steps_trained': 1002000, 'load_time_ms': 0.673, 'update_time_ms': 2.525}",835,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.6460485458374,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,66000,1002000,{},55,300,4.000079768933953,2025-09-05_02-01-41,4.001702279138652,3651949,1757030501,4.0002246964451595,34263.237668037415,89425,4.0
+cda-server-2,False,2654.4209916591644,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 2.109739303588867, 'cur_kl_coeff': 2.432743787765503, 'policy_loss': -0.04223893955349922, 'vf_explained_var': 0.7424198389053345, 'entropy': 1.7431635856628418, 'total_loss': 2.084862470626831, 'kl': 0.007136723026633263}, 'sample_time_ms': 46857.348, 'num_steps_sampled': 1003200, 'grad_time_ms': 371.02, 'num_steps_trained': 1003200, 'load_time_ms': 0.69, 'update_time_ms': 2.523}",836,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.6385772228241,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,67200,1003200,{},56,291,-6.951200583934202,2025-09-05_02-02-27,4.0024479785942555,3651949,1757030547,3.808239847125168,34309.87624526024,89716,4.123711340206185
+cda-server-2,False,2702.0383739471436,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 6.08589506149292, 'cur_kl_coeff': 2.432743787765503, 'policy_loss': -0.059231605380773544, 'vf_explained_var': 0.5848007798194885, 'entropy': 1.8705410957336426, 'total_loss': 6.044958591461182, 'kl': 0.007520413026213646}, 'sample_time_ms': 46912.26, 'num_steps_sampled': 1004400, 'grad_time_ms': 372.301, 'num_steps_trained': 1004400, 'load_time_ms': 0.688, 'update_time_ms': 2.568}",837,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.617382287979126,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,68400,1004400,{},57,287,-21.969267051290664,2025-09-05_02-03-15,4.002718773356998,3651949,1757030595,3.695349864450316,34357.49362754822,90003,4.195121951219512
+cda-server-2,False,2748.893737077713,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 2.9250335693359375, 'cur_kl_coeff': 2.432743787765503, 'policy_loss': -0.06444211304187775, 'vf_explained_var': 0.6794325709342957, 'entropy': 1.9101426601409912, 'total_loss': 2.895181179046631, 'kl': 0.014218462631106377}, 'sample_time_ms': 46833.444, 'num_steps_sampled': 1005600, 'grad_time_ms': 370.834, 'num_steps_trained': 1005600, 'load_time_ms': 0.686, 'update_time_ms': 2.566}",838,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.85536313056946,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,69600,1005600,{},58,286,-13.15736932629656,2025-09-05_02-04-02,4.001995336854362,3651949,1757030642,3.7075084388967254,34404.34899067879,90289,4.185314685314685
+cda-server-2,False,2796.1250982284546,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.7307040691375732, 'cur_kl_coeff': 2.432743787765503, 'policy_loss': -0.04437698423862457, 'vf_explained_var': 0.8905224204063416, 'entropy': 1.7138915061950684, 'total_loss': 0.7109454274177551, 'kl': 0.010119595564901829}, 'sample_time_ms': 46839.632, 'num_steps_sampled': 1006800, 'grad_time_ms': 373.49, 'num_steps_trained': 1006800, 'load_time_ms': 0.694, 'update_time_ms': 2.594}",839,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.23136115074158,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,70800,1006800,{},59,295,-7.067287220283557,2025-09-05_02-04-49,4.0160442478173355,3651949,1757030689,3.90216877290391,34451.58035182953,90584,4.064406779661017
+cda-server-2,False,2843.541459083557,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.6730295419692993, 'cur_kl_coeff': 2.432743787765503, 'policy_loss': -0.051897481083869934, 'vf_explained_var': 0.8944464921951294, 'entropy': 1.6187870502471924, 'total_loss': 0.6266192197799683, 'kl': 0.0022555519826710224}, 'sample_time_ms': 46877.723, 'num_steps_sampled': 1008000, 'grad_time_ms': 374.493, 'num_steps_trained': 1008000, 'load_time_ms': 0.69, 'update_time_ms': 2.616}",840,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.41636085510254,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,72000,1008000,{},60,297,-6.623467773245288,2025-09-05_02-05-36,5.028262547214235,3651949,1757030736,3.932894433019231,34498.99671268463,90881,4.047138047138047
+cda-server-2,False,2892.0355756282806,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.014225517399609089, 'cur_kl_coeff': 1.2163718938827515, 'policy_loss': -0.09503839910030365, 'vf_explained_var': 0.9973185062408447, 'entropy': 1.5763732194900513, 'total_loss': -0.05183200538158417, 'kl': 0.023825662210583687}, 'sample_time_ms': 47023.204, 'num_steps_sampled': 1009200, 'grad_time_ms': 374.407, 'num_steps_trained': 1009200, 'load_time_ms': 0.679, 'update_time_ms': 2.592}",841,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",48.49411654472351,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,73200,1009200,{},61,300,4.000054628599927,2025-09-05_02-06-25,4.005483043406404,3651949,1757030785,4.00024954384086,34547.490829229355,91181,4.0
+cda-server-2,False,2939.398644924164,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.783150851726532, 'cur_kl_coeff': 1.8245577812194824, 'policy_loss': -0.032420676201581955, 'vf_explained_var': 0.8862425684928894, 'entropy': 1.589916706085205, 'total_loss': 0.7971222996711731, 'kl': 0.02542654052376747}, 'sample_time_ms': 47019.846, 'num_steps_sampled': 1010400, 'grad_time_ms': 375.712, 'num_steps_trained': 1010400, 'load_time_ms': 0.683, 'update_time_ms': 2.614}",842,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.36306929588318,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,74400,1010400,{},62,299,-6.313502860682476,2025-09-05_02-07-12,7.0249180039085815,3651949,1757030832,3.9758396316724185,34594.85389852524,91480,4.0200668896321075
+cda-server-2,False,2986.665239095688,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.34120887517929077, 'cur_kl_coeff': 2.7368369102478027, 'policy_loss': -0.048503000289201736, 'vf_explained_var': 0.9429805278778076, 'entropy': 1.6626811027526855, 'total_loss': 0.3047863841056824, 'kl': 0.004414035473018885}, 'sample_time_ms': 46989.827, 'num_steps_sampled': 1011600, 'grad_time_ms': 375.314, 'num_steps_trained': 1011600, 'load_time_ms': 0.675, 'update_time_ms': 2.645}",843,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.26659417152405,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,75600,1011600,{},63,296,-7.844585469642691,2025-09-05_02-08-00,4.103040915534221,3651949,1757030880,3.926104031881041,34642.12049269676,91776,4.047297297297297
+cda-server-2,False,3033.6946427822113,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 1.7640461921691895, 'cur_kl_coeff': 1.3684184551239014, 'policy_loss': -0.048718929290771484, 'vf_explained_var': 0.7972148060798645, 'entropy': 1.7225337028503418, 'total_loss': 1.7359509468078613, 'kl': 0.015071181580424309}, 'sample_time_ms': 46971.756, 'num_steps_sampled': 1012800, 'grad_time_ms': 374.371, 'num_steps_trained': 1012800, 'load_time_ms': 0.676, 'update_time_ms': 2.658}",844,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.02940368652344,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,76800,1012800,{},64,293,-10.947529744655213,2025-09-05_02-08-47,4.003919068362274,3651949,1757030927,3.855561278102881,34689.149896383286,92069,4.09556313993174
+cda-server-2,False,3081.054685115814,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.11681367456912994, 'cur_kl_coeff': 1.3684184551239014, 'policy_loss': -0.054528847336769104, 'vf_explained_var': 0.9810612797737122, 'entropy': 1.6391432285308838, 'total_loss': 0.08083418011665344, 'kl': 0.013555314391851425}, 'sample_time_ms': 46943.11, 'num_steps_sampled': 1014000, 'grad_time_ms': 374.353, 'num_steps_trained': 1014000, 'load_time_ms': 0.692, 'update_time_ms': 2.664}",845,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.360042333602905,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,78000,1014000,{},65,298,-3.8104298060247075,2025-09-05_02-09-34,4.0021551713484484,3651949,1757030974,3.974042861101505,34736.50993871689,92367,4.02013422818792
+cda-server-2,False,3128.471792936325,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 6.564021587371826, 'cur_kl_coeff': 1.3684184551239014, 'policy_loss': -0.05545325577259064, 'vf_explained_var': 0.6222854256629944, 'entropy': 1.926668643951416, 'total_loss': 6.520650863647461, 'kl': 0.0088294493034482}, 'sample_time_ms': 47023.704, 'num_steps_sampled': 1015200, 'grad_time_ms': 371.66, 'num_steps_trained': 1015200, 'load_time_ms': 0.673, 'update_time_ms': 2.689}",846,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.417107820510864,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,79200,1015200,{},66,286,-22.18296229789366,2025-09-05_02-10-22,4.0009488497783146,3651949,1757031022,3.7816032490069897,34783.9270465374,92653,4.143356643356643
+cda-server-2,False,3175.341913461685,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.9627919793128967, 'cur_kl_coeff': 1.3684184551239014, 'policy_loss': -0.06439124047756195, 'vf_explained_var': 0.8632643818855286, 'entropy': 1.6867482662200928, 'total_loss': 0.9128870368003845, 'kl': 0.010586160235106945}, 'sample_time_ms': 46949.055, 'num_steps_sampled': 1016400, 'grad_time_ms': 371.604, 'num_steps_trained': 1016400, 'load_time_ms': 0.667, 'update_time_ms': 2.68}",847,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.87012052536011,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,80400,1016400,{},67,296,-22.6179959140363,2025-09-05_02-11-08,4.002575296955297,3651949,1757031068,3.831446230988727,34830.79716706276,92949,4.10472972972973
+cda-server-2,False,3222.875273704529,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 1.5744482278823853, 'cur_kl_coeff': 1.3684184551239014, 'policy_loss': -0.04186302050948143, 'vf_explained_var': 0.805033266544342, 'entropy': 1.647596836090088, 'total_loss': 1.5444416999816895, 'kl': 0.008664320223033428}, 'sample_time_ms': 47015.608, 'num_steps_sampled': 1017600, 'grad_time_ms': 372.874, 'num_steps_trained': 1017600, 'load_time_ms': 0.667, 'update_time_ms': 2.704}",848,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.53336024284363,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,81600,1017600,{},68,295,-13.01126053137859,2025-09-05_02-11-56,4.002531271831623,3651949,1757031116,3.8875773331045975,34878.3305273056,93244,4.074576271186441
+cda-server-2,False,3270.9713361263275,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.5337961316108704, 'cur_kl_coeff': 1.3684184551239014, 'policy_loss': -0.03373105078935623, 'vf_explained_var': 0.9197341203689575, 'entropy': 1.52744722366333, 'total_loss': 0.5145151019096375, 'kl': 0.010559634305536747}, 'sample_time_ms': 47105.408, 'num_steps_sampled': 1018800, 'grad_time_ms': 369.658, 'num_steps_trained': 1018800, 'load_time_ms': 0.652, 'update_time_ms': 2.675}",849,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",48.096062421798706,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,82800,1018800,{},69,297,-6.030843978558636,2025-09-05_02-12-44,4.0022888603229845,3651949,1757031164,3.9326035464886924,34926.4265897274,93541,4.040404040404041
+cda-server-2,False,3318.1038093566895,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.17231231927871704, 'cur_kl_coeff': 1.3684184551239014, 'policy_loss': -0.040909189730882645, 'vf_explained_var': 0.969477117061615, 'entropy': 1.5548286437988281, 'total_loss': 0.15273785591125488, 'kl': 0.01559081207960844}, 'sample_time_ms': 47077.918, 'num_steps_sampled': 1020000, 'grad_time_ms': 368.771, 'num_steps_trained': 1020000, 'load_time_ms': 0.649, 'update_time_ms': 2.641}",850,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.13247323036194,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,84000,1020000,{},70,299,-2.0964218592211097,2025-09-05_02-13-31,4.100479878668449,3651949,1757031211,3.9801735274010057,34973.559062957764,93840,4.013377926421405
+cda-server-2,False,3365.2187576293945,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 3.737804889678955, 'cur_kl_coeff': 1.3684184551239014, 'policy_loss': -0.04222143813967705, 'vf_explained_var': 0.7075552344322205, 'entropy': 1.907225251197815, 'total_loss': 3.7077810764312744, 'kl': 0.008913558907806873}, 'sample_time_ms': 46941.779, 'num_steps_sampled': 1021200, 'grad_time_ms': 366.88, 'num_steps_trained': 1021200, 'load_time_ms': 0.652, 'update_time_ms': 2.777}",851,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.11494827270508,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,85200,1021200,{},71,292,-20.45980288708402,2025-09-05_02-14-18,4.003708898444644,3651949,1757031258,3.832519360572173,35020.67401123047,94132,4.109589041095891
+cda-server-2,False,3412.5559413433075,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.5083017945289612, 'cur_kl_coeff': 1.3684184551239014, 'policy_loss': -0.04789041355252266, 'vf_explained_var': 0.9286045432090759, 'entropy': 1.7039881944656372, 'total_loss': 0.4770981967449188, 'kl': 0.012194222770631313}, 'sample_time_ms': 46938.688, 'num_steps_sampled': 1022400, 'grad_time_ms': 367.508, 'num_steps_trained': 1022400, 'load_time_ms': 0.65, 'update_time_ms': 2.724}",852,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.337183713912964,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,86400,1022400,{},72,298,-10.305195964145561,2025-09-05_02-15-06,4.001530080673005,3651949,1757031306,3.9386926428444164,35068.01119494438,94430,4.040268456375839
+cda-server-2,False,3461.169378042221,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.9372121095657349, 'cur_kl_coeff': 1.3684184551239014, 'policy_loss': -0.04090845212340355, 'vf_explained_var': 0.8610450029373169, 'entropy': 1.8454023599624634, 'total_loss': 0.9138251543045044, 'kl': 0.012804157100617886}, 'sample_time_ms': 47072.679, 'num_steps_sampled': 1023600, 'grad_time_ms': 368.146, 'num_steps_trained': 1023600, 'load_time_ms': 0.672, 'update_time_ms': 2.734}",853,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",48.613436698913574,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,87600,1023600,{},73,294,-6.752795301199743,2025-09-05_02-15-54,4.001648529056688,3651949,1757031354,3.8833382461296617,35116.624631643295,94724,4.074829931972789
+cda-server-2,False,3508.2650122642517,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.9866107106208801, 'cur_kl_coeff': 1.3684184551239014, 'policy_loss': -0.03655135631561279, 'vf_explained_var': 0.8712561726570129, 'entropy': 1.7019932270050049, 'total_loss': 0.9718363881111145, 'kl': 0.01591402105987072}, 'sample_time_ms': 47079.812, 'num_steps_sampled': 1024800, 'grad_time_ms': 367.695, 'num_steps_trained': 1024800, 'load_time_ms': 0.672, 'update_time_ms': 2.773}",854,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.09563422203064,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,88800,1024800,{},74,297,-10.947154128957422,2025-09-05_02-16-42,4.155761849921854,3651949,1757031402,3.937459962800192,35163.720265865326,95021,4.043771043771044
+cda-server-2,False,3555.659045934677,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.29008471965789795, 'cur_kl_coeff': 1.3684184551239014, 'policy_loss': -0.04245949909090996, 'vf_explained_var': 0.9557339549064636, 'entropy': 1.6644426584243774, 'total_loss': 0.25923487544059753, 'kl': 0.008484016172587872}, 'sample_time_ms': 47084.179, 'num_steps_sampled': 1026000, 'grad_time_ms': 366.68, 'num_steps_trained': 1026000, 'load_time_ms': 0.655, 'update_time_ms': 2.755}",855,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.394033670425415,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,90000,1026000,{},75,297,-3.0937475399149967,2025-09-05_02-17-29,4.111535597011368,3651949,1757031449,3.9604825164549546,35211.11429953575,95318,4.026936026936027
+cda-server-2,False,3602.757490158081,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.42554929852485657, 'cur_kl_coeff': 1.3684184551239014, 'policy_loss': -0.03615068271756172, 'vf_explained_var': 0.9381406307220459, 'entropy': 1.572040319442749, 'total_loss': 0.3967033922672272, 'kl': 0.005338112823665142}, 'sample_time_ms': 47050.886, 'num_steps_sampled': 1027200, 'grad_time_ms': 368.127, 'num_steps_trained': 1027200, 'load_time_ms': 0.653, 'update_time_ms': 2.747}",856,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.09844422340393,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,91200,1027200,{},76,298,-9.032099651438411,2025-09-05_02-18-16,4.002385814516934,3651949,1757031496,3.9564894929866226,35258.212743759155,95616,4.030201342281879
+cda-server-2,False,3649.9481089115143,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 1.4891831874847412, 'cur_kl_coeff': 1.3684184551239014, 'policy_loss': -0.046019166707992554, 'vf_explained_var': 0.8592383861541748, 'entropy': 1.877967357635498, 'total_loss': 1.4562660455703735, 'kl': 0.009574709460139275}, 'sample_time_ms': 47084.627, 'num_steps_sampled': 1028400, 'grad_time_ms': 366.468, 'num_steps_trained': 1028400, 'load_time_ms': 0.653, 'update_time_ms': 2.744}",857,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.19061875343323,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,92400,1028400,{},77,293,-18.206337076477084,2025-09-05_02-19-03,4.0019244035208015,3651949,1757031543,3.85261970829871,35305.40336251259,95909,4.098976109215017
+cda-server-2,False,3698.411008119583,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.951488196849823, 'cur_kl_coeff': 1.3684184551239014, 'policy_loss': -0.03943841904401779, 'vf_explained_var': 0.8684049248695374, 'entropy': 1.7809098958969116, 'total_loss': 0.9446311593055725, 'kl': 0.023809516802430153}, 'sample_time_ms': 47178.333, 'num_steps_sampled': 1029600, 'grad_time_ms': 365.734, 'num_steps_trained': 1029600, 'load_time_ms': 0.659, 'update_time_ms': 2.72}",858,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",48.46289920806885,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,93600,1029600,{},78,296,-8.768533616429245,2025-09-05_02-19-52,6.0080410600827285,3651949,1757031592,3.929065338838763,35353.86626172066,96205,4.050675675675675
+cda-server-2,False,3745.399493455887,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.15092967450618744, 'cur_kl_coeff': 2.0526275634765625, 'policy_loss': -0.04178021848201752, 'vf_explained_var': 0.97370445728302, 'entropy': 1.6728134155273438, 'total_loss': 0.1301470547914505, 'kl': 0.010229609906673431}, 'sample_time_ms': 47066.693, 'num_steps_sampled': 1030800, 'grad_time_ms': 366.571, 'num_steps_trained': 1030800, 'load_time_ms': 0.664, 'update_time_ms': 2.733}",859,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.98848533630371,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,94800,1030800,{},79,299,-2.063900500181365,2025-09-05_02-20-39,4.001913774707609,3651949,1757031639,3.9799500736541122,35400.85474705696,96504,4.013377926421405
+cda-server-2,False,3792.646115064621,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.08895605057477951, 'cur_kl_coeff': 2.0526275634765625, 'policy_loss': -0.03882890194654465, 'vf_explained_var': 0.9836444854736328, 'entropy': 1.5997546911239624, 'total_loss': 0.06255945563316345, 'kl': 0.006056779995560646}, 'sample_time_ms': 47076.069, 'num_steps_sampled': 1032000, 'grad_time_ms': 368.547, 'num_steps_trained': 1032000, 'load_time_ms': 0.668, 'update_time_ms': 2.726}",860,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.24662160873413,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,96000,1032000,{},80,300,-0.029156556361344954,2025-09-05_02-21-26,4.000943291599288,3651949,1757031686,3.9867878021114804,35448.101368665695,96804,4.01
+cda-server-2,False,3839.9683599472046,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.00448631402105093, 'cur_kl_coeff': 2.0526275634765625, 'policy_loss': -0.04847244918346405, 'vf_explained_var': 0.9991893768310547, 'entropy': 1.626355767250061, 'total_loss': -0.004763439297676086, 'kl': 0.019108539447188377}, 'sample_time_ms': 47093.837, 'num_steps_sampled': 1033200, 'grad_time_ms': 371.567, 'num_steps_trained': 1033200, 'load_time_ms': 0.676, 'update_time_ms': 2.601}",861,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.32224488258362,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,97200,1033200,{},81,300,4.000071485714502,2025-09-05_02-22-13,4.004060045472517,3651949,1757031733,4.000229414012279,35495.42361354828,97104,4.0
+cda-server-2,False,3887.524997472763,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.22216840088367462, 'cur_kl_coeff': 2.0526275634765625, 'policy_loss': -0.030925869941711426, 'vf_explained_var': 0.961039662361145, 'entropy': 1.690726399421692, 'total_loss': 0.19594722986221313, 'kl': 0.0022920460905879736}, 'sample_time_ms': 47117.983, 'num_steps_sampled': 1034400, 'grad_time_ms': 369.372, 'num_steps_trained': 1034400, 'load_time_ms': 0.673, 'update_time_ms': 2.634}",862,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.55663752555847,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,98400,1034400,{},82,298,-2.9009658105735845,2025-09-05_02-23-01,4.153891133343445,3651949,1757031781,3.9575125160733085,35542.98025107384,97402,4.030201342281879
+cda-server-2,False,3934.716385126114,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 1.4149174690246582, 'cur_kl_coeff': 1.0263137817382812, 'policy_loss': -0.028615090996026993, 'vf_explained_var': 0.8368207812309265, 'entropy': 1.80344557762146, 'total_loss': 1.3911685943603516, 'kl': 0.0047414242289960384}, 'sample_time_ms': 46975.155, 'num_steps_sampled': 1035600, 'grad_time_ms': 369.995, 'num_steps_trained': 1035600, 'load_time_ms': 0.661, 'update_time_ms': 2.587}",863,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.19138765335083,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,99600,1035600,{},83,297,-14.568411083173391,2025-09-05_02-23-48,4.0021343322353475,3651949,1757031828,3.9377038734228167,35590.17163872719,97699,4.040404040404041
+cda-server-2,False,3981.683268070221,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.028436502441763878, 'cur_kl_coeff': 0.5131568908691406, 'policy_loss': -0.06831005960702896, 'vf_explained_var': 0.9948043823242188, 'entropy': 1.5992634296417236, 'total_loss': -0.019522948190569878, 'kl': 0.03965768218040466}, 'sample_time_ms': 46959.895, 'num_steps_sampled': 1036800, 'grad_time_ms': 372.304, 'num_steps_trained': 1036800, 'load_time_ms': 0.664, 'update_time_ms': 2.571}",864,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.966882944107056,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,100800,1036800,{},84,299,2.5430514569442053,2025-09-05_02-24-35,4.097906543848747,3651949,1757031875,3.995679893304896,35637.138521671295,97998,4.003344481605351
+cda-server-2,False,4029.087103366852,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 1.0207265615463257, 'cur_kl_coeff': 0.7697353363037109, 'policy_loss': -0.03795013204216957, 'vf_explained_var': 0.8686723113059998, 'entropy': 1.7936816215515137, 'total_loss': 0.9861171245574951, 'kl': 0.004340069368481636}, 'sample_time_ms': 46959.544, 'num_steps_sampled': 1038000, 'grad_time_ms': 373.739, 'num_steps_trained': 1038000, 'load_time_ms': 0.657, 'update_time_ms': 2.569}",865,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.40383529663086,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,102000,1038000,{},85,296,-10.615523740509985,2025-09-05_02-25-23,4.001541045338795,3651949,1757031923,3.912718043270071,35684.542356967926,98294,4.0574324324324325
+cda-server-2,False,4076.687091112137,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 1.0759044885635376, 'cur_kl_coeff': 0.38486766815185547, 'policy_loss': -0.06365203857421875, 'vf_explained_var': 0.8513641953468323, 'entropy': 1.7185730934143066, 'total_loss': 1.0180267095565796, 'kl': 0.015002868138253689}, 'sample_time_ms': 47008.785, 'num_steps_sampled': 1039200, 'grad_time_ms': 374.66, 'num_steps_trained': 1039200, 'load_time_ms': 0.659, 'update_time_ms': 2.582}",866,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.599987745285034,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,103200,1039200,{},86,293,-8.442591539847172,2025-09-05_02-26-10,4.134358960548335,3651949,1757031970,3.8611631546510012,35732.14234471321,98587,4.088737201365188
+cda-server-2,False,4123.634033203125,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 2.012903928756714, 'cur_kl_coeff': 0.38486766815185547, 'policy_loss': -0.055759914219379425, 'vf_explained_var': 0.7706321477890015, 'entropy': 1.8083112239837646, 'total_loss': 1.9681257009506226, 'kl': 0.02853398770093918}, 'sample_time_ms': 46984.056, 'num_steps_sampled': 1040400, 'grad_time_ms': 375.05, 'num_steps_trained': 1040400, 'load_time_ms': 0.661, 'update_time_ms': 2.546}",867,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.94694209098816,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,104400,1040400,{},87,291,-8.633827227810752,2025-09-05_02-26-57,8.000034105696873,3651949,1757032017,3.811383140448338,35779.0892868042,98878,4.123711340206185
+cda-server-2,False,4170.876131296158,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 2.61270809173584, 'cur_kl_coeff': 0.5773015022277832, 'policy_loss': -0.05995117872953415, 'vf_explained_var': 0.7350280284881592, 'entropy': 1.8723901510238647, 'total_loss': 2.5706839561462402, 'kl': 0.031053271144628525}, 'sample_time_ms': 46863.587, 'num_steps_sampled': 1041600, 'grad_time_ms': 373.377, 'num_steps_trained': 1041600, 'load_time_ms': 0.659, 'update_time_ms': 2.593}",868,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.24209809303284,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,105600,1041600,{},88,288,-14.332236758290303,2025-09-05_02-27-44,4.0084490855803345,3651949,1757032064,3.7660020275860706,35826.33138489723,99166,4.15625
+cda-server-2,False,4218.02410697937,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 1.6040679216384888, 'cur_kl_coeff': 0.8659522533416748, 'policy_loss': -0.045723091810941696, 'vf_explained_var': 0.7927010655403137, 'entropy': 1.7627092599868774, 'total_loss': 1.5658777952194214, 'kl': 0.008699173107743263}, 'sample_time_ms': 46879.924, 'num_steps_sampled': 1042800, 'grad_time_ms': 372.932, 'num_steps_trained': 1042800, 'load_time_ms': 0.654, 'update_time_ms': 2.578}",869,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.14797568321228,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,106800,1042800,{},89,295,-10.237191955210637,2025-09-05_02-28-32,4.344108000962315,3651949,1757032112,3.8888261688516033,35873.479360580444,99461,4.071186440677966
+cda-server-2,False,4264.8796372413635,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 3.9907429218292236, 'cur_kl_coeff': 0.8659522533416748, 'policy_loss': -0.03930972144007683, 'vf_explained_var': 0.6695454716682434, 'entropy': 1.773967981338501, 'total_loss': 3.9705400466918945, 'kl': 0.022063931450247765}, 'sample_time_ms': 46839.657, 'num_steps_sampled': 1044000, 'grad_time_ms': 374.063, 'num_steps_trained': 1044000, 'load_time_ms': 0.665, 'update_time_ms': 2.636}",870,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.85553026199341,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,108000,1044000,{},90,292,-20.969019081543124,2025-09-05_02-29-18,4.000975294547331,3651949,1757032158,3.8130491590673063,35920.33489084244,99753,4.11986301369863
+cda-server-2,False,4311.608902692795,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 2.4754321575164795, 'cur_kl_coeff': 1.2989283800125122, 'policy_loss': -0.050115033984184265, 'vf_explained_var': 0.7231003642082214, 'entropy': 1.8573485612869263, 'total_loss': 2.438021183013916, 'kl': 0.009780575521290302}, 'sample_time_ms': 46782.984, 'num_steps_sampled': 1045200, 'grad_time_ms': 371.452, 'num_steps_trained': 1045200, 'load_time_ms': 0.661, 'update_time_ms': 2.682}",871,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.729265451431274,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,109200,1045200,{},91,291,-10.805479711689856,2025-09-05_02-30-05,4.152770239447509,3651949,1757032205,3.8123680193373417,35967.06415629387,100044,4.1271477663230245
+cda-server-2,False,4358.617847442627,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 4.531902313232422, 'cur_kl_coeff': 1.2989283800125122, 'policy_loss': -0.04319247603416443, 'vf_explained_var': 0.6909646391868591, 'entropy': 1.690366268157959, 'total_loss': 4.495872497558594, 'kl': 0.005514280870556831}, 'sample_time_ms': 46727.646, 'num_steps_sampled': 1046400, 'grad_time_ms': 372.025, 'num_steps_trained': 1046400, 'load_time_ms': 0.657, 'update_time_ms': 2.662}",872,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.00894474983215,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,110400,1046400,{},92,292,-25.84163814989553,2025-09-05_02-30-52,8.00000040038638,3651949,1757032252,3.8490710389223346,36014.0731010437,100336,4.0993150684931505
+cda-server-2,False,4405.620505571365,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 2.6366374492645264, 'cur_kl_coeff': 1.2989283800125122, 'policy_loss': -0.05803981050848961, 'vf_explained_var': 0.6993056535720825, 'entropy': 1.8889535665512085, 'total_loss': 2.592402219772339, 'kl': 0.010627496987581253}, 'sample_time_ms': 46709.217, 'num_steps_sampled': 1047600, 'grad_time_ms': 371.614, 'num_steps_trained': 1047600, 'load_time_ms': 0.658, 'update_time_ms': 2.688}",873,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.0026581287384,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,111600,1047600,{},93,292,-14.402527847863233,2025-09-05_02-31-39,4.144746568904516,3651949,1757032299,3.8086088942115413,36061.07575917244,100628,4.126712328767123
+cda-server-2,False,4452.6467180252075,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 1.4375331401824951, 'cur_kl_coeff': 1.2989283800125122, 'policy_loss': -0.05303184688091278, 'vf_explained_var': 0.7942469120025635, 'entropy': 1.7751680612564087, 'total_loss': 1.4004120826721191, 'kl': 0.012249093502759933}, 'sample_time_ms': 46717.601, 'num_steps_sampled': 1048800, 'grad_time_ms': 369.234, 'num_steps_trained': 1048800, 'load_time_ms': 0.65, 'update_time_ms': 2.684}",874,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.02621245384216,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,112800,1048800,{},94,289,-6.070105589983218,2025-09-05_02-32-26,4.025424877601431,3651949,1757032346,3.8043935583327024,36108.10197162628,100917,4.131487889273356
+cda-server-2,False,4499.890250205994,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 2.1109554767608643, 'cur_kl_coeff': 1.2989283800125122, 'policy_loss': -0.04051174595952034, 'vf_explained_var': 0.7790870666503906, 'entropy': 1.6913646459579468, 'total_loss': 2.08211088180542, 'kl': 0.008982077240943909}, 'sample_time_ms': 46703.177, 'num_steps_sampled': 1050000, 'grad_time_ms': 367.624, 'num_steps_trained': 1050000, 'load_time_ms': 0.652, 'update_time_ms': 2.661}",875,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.24353218078613,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,114000,1050000,{},95,296,-17.520288488662782,2025-09-05_02-33-14,4.002623502880692,3651949,1757032394,3.885369529685228,36155.34550380707,101213,4.070945945945946
+cda-server-2,False,4549.380757570267,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.08191846311092377, 'cur_kl_coeff': 1.2989283800125122, 'policy_loss': -0.05402367189526558, 'vf_explained_var': 0.9858669638633728, 'entropy': 1.666379690170288, 'total_loss': 0.042791612446308136, 'kl': 0.011468542739748955}, 'sample_time_ms': 46894.061, 'num_steps_sampled': 1051200, 'grad_time_ms': 365.753, 'num_steps_trained': 1051200, 'load_time_ms': 0.664, 'update_time_ms': 2.657}",876,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",49.49050736427307,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,115200,1051200,{},96,298,-1.1716103910594278,2025-09-05_02-34-03,4.0014497598455065,3651949,1757032443,3.972802016711378,36204.83601117134,101511,4.02013422818792
+cda-server-2,False,4596.379050016403,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.010374766774475574, 'cur_kl_coeff': 1.2989283800125122, 'policy_loss': -0.07171420753002167, 'vf_explained_var': 0.9980847835540771, 'entropy': 1.6615816354751587, 'total_loss': -0.0277959443628788, 'kl': 0.02582397870719433}, 'sample_time_ms': 46898.966, 'num_steps_sampled': 1052400, 'grad_time_ms': 365.953, 'num_steps_trained': 1052400, 'load_time_ms': 0.663, 'update_time_ms': 2.664}",877,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.998292446136475,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,116400,1052400,{},97,300,4.000078448041956,2025-09-05_02-34-50,4.098817023428135,3651949,1757032490,4.000537573356013,36251.83430361748,101811,4.0
+cda-server-2,False,4643.824803829193,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 2.7474539279937744, 'cur_kl_coeff': 1.948392629623413, 'policy_loss': -0.04414926841855049, 'vf_explained_var': 0.7470448613166809, 'entropy': 1.794586181640625, 'total_loss': 2.712033271789551, 'kl': 0.004480044357478619}, 'sample_time_ms': 46916.579, 'num_steps_sampled': 1053600, 'grad_time_ms': 368.716, 'num_steps_trained': 1053600, 'load_time_ms': 0.663, 'update_time_ms': 2.619}",878,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.44575381278992,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,117600,1053600,{},98,291,-18.465596458805322,2025-09-05_02-35-38,4.740949061502659,3651949,1757032538,3.8188354037476553,36299.28005743027,102102,4.116838487972508
+cda-server-2,False,4690.8918998241425,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 4.36021614074707, 'cur_kl_coeff': 0.9741963148117065, 'policy_loss': -0.05083365738391876, 'vf_explained_var': 0.683496356010437, 'entropy': 1.7298600673675537, 'total_loss': 4.3148627281188965, 'kl': 0.005625119898468256}, 'sample_time_ms': 46907.361, 'num_steps_sampled': 1054800, 'grad_time_ms': 369.844, 'num_steps_trained': 1054800, 'load_time_ms': 0.672, 'update_time_ms': 2.638}",879,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.06709599494934,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,118800,1054800,{},99,292,-24.172852530160384,2025-09-05_02-36-25,4.001776322429072,3651949,1757032585,3.8113737426614356,36346.34715342522,102394,4.11986301369863
+cda-server-2,False,4737.693937063217,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 3.805392026901245, 'cur_kl_coeff': 0.9741963148117065, 'policy_loss': -0.04470294341444969, 'vf_explained_var': 0.6905941367149353, 'entropy': 1.8461089134216309, 'total_loss': 3.786196708679199, 'kl': 0.02618289738893509}, 'sample_time_ms': 46904.596, 'num_steps_sampled': 1056000, 'grad_time_ms': 367.309, 'num_steps_trained': 1056000, 'load_time_ms': 0.656, 'update_time_ms': 2.64}",880,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.80203723907471,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,120000,1056000,{},100,291,-22.90934195242167,2025-09-05_02-37-12,4.190513806122592,3651949,1757032632,3.8116798598960164,36393.14919066429,102685,4.123711340206185
+cda-server-2,False,4784.721884489059,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.28617578744888306, 'cur_kl_coeff': 1.461294412612915, 'policy_loss': -0.06089896708726883, 'vf_explained_var': 0.9583684206008911, 'entropy': 1.6390106678009033, 'total_loss': 0.24192661046981812, 'kl': 0.011393861845135689}, 'sample_time_ms': 46935.025, 'num_steps_sampled': 1057200, 'grad_time_ms': 366.763, 'num_steps_trained': 1057200, 'load_time_ms': 0.657, 'update_time_ms': 2.637}",881,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.027947425842285,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,121200,1057200,{},101,299,-6.96937072763734,2025-09-05_02-37-59,4.050369193835761,3651949,1757032679,3.963673689134469,36440.177138090134,102984,4.0200668896321075
+cda-server-2,False,4832.137006282806,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.5397180914878845, 'cur_kl_coeff': 1.461294412612915, 'policy_loss': -0.04076986014842987, 'vf_explained_var': 0.9104292392730713, 'entropy': 1.6930409669876099, 'total_loss': 0.5123088359832764, 'kl': 0.009142959490418434}, 'sample_time_ms': 46974.138, 'num_steps_sampled': 1058400, 'grad_time_ms': 368.194, 'num_steps_trained': 1058400, 'load_time_ms': 0.658, 'update_time_ms': 2.653}",882,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.41512179374695,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,122400,1058400,{},102,296,-4.058740189097739,2025-09-05_02-38-46,4.002076441453823,3651949,1757032726,3.9355529976656647,36487.59225988388,103280,4.043918918918919
+cda-server-2,False,4879.119877576828,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.6877517104148865, 'cur_kl_coeff': 1.461294412612915, 'policy_loss': -0.03623541444540024, 'vf_explained_var': 0.9030869603157043, 'entropy': 1.737912893295288, 'total_loss': 0.6572979688644409, 'kl': 0.003956564702093601}, 'sample_time_ms': 46971.374, 'num_steps_sampled': 1059600, 'grad_time_ms': 368.98, 'num_steps_trained': 1059600, 'load_time_ms': 0.657, 'update_time_ms': 2.64}",883,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.982871294021606,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,123600,1059600,{},103,297,-9.764382124850606,2025-09-05_02-39-33,8.000000543811925,3651949,1757032773,3.940758314162735,36534.5751311779,103577,4.043771043771044
+cda-server-2,False,4926.45415687561,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.9016821384429932, 'cur_kl_coeff': 0.7306472063064575, 'policy_loss': -0.050413258373737335, 'vf_explained_var': 0.8733535408973694, 'entropy': 1.8559746742248535, 'total_loss': 0.8578717708587646, 'kl': 0.009037166833877563}, 'sample_time_ms': 47000.169, 'num_steps_sampled': 1060800, 'grad_time_ms': 370.958, 'num_steps_trained': 1060800, 'load_time_ms': 0.664, 'update_time_ms': 2.621}",884,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.33427929878235,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,124800,1060800,{},104,293,-6.88334672493346,2025-09-05_02-40-20,4.1658625029783085,3651949,1757032820,3.851568891388484,36581.909410476685,103870,4.09556313993174
+cda-server-2,False,4973.314180612564,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.5732951760292053, 'cur_kl_coeff': 0.7306472063064575, 'policy_loss': -0.03666526451706886, 'vf_explained_var': 0.9228690266609192, 'entropy': 1.7589308023452759, 'total_loss': 0.544025182723999, 'kl': 0.010121528059244156}, 'sample_time_ms': 46963.043, 'num_steps_sampled': 1062000, 'grad_time_ms': 369.77, 'num_steps_trained': 1062000, 'load_time_ms': 0.665, 'update_time_ms': 2.653}",885,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.860023736953735,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,126000,1062000,{},105,295,-11.496498011761279,2025-09-05_02-41-07,4.008836288361233,3651949,1757032867,3.913771857398418,36628.76943421364,104165,4.057627118644068
+cda-server-2,False,5020.626423597336,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.5960099101066589, 'cur_kl_coeff': 0.7306472063064575, 'policy_loss': -0.036775778979063034, 'vf_explained_var': 0.9043925404548645, 'entropy': 1.6872469186782837, 'total_loss': 0.5790513753890991, 'kl': 0.027122920379042625}, 'sample_time_ms': 46744.043, 'num_steps_sampled': 1063200, 'grad_time_ms': 370.983, 'num_steps_trained': 1063200, 'load_time_ms': 0.66, 'update_time_ms': 2.614}",886,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.31224298477173,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,127200,1063200,{},106,296,-4.7433799063483235,2025-09-05_02-41-55,4.095289308937806,3651949,1757032915,3.9287165412323044,36676.08167719841,104461,4.047297297297297
+cda-server-2,False,5067.875581979752,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.4121336340904236, 'cur_kl_coeff': 1.095970869064331, 'policy_loss': -0.04310419782996178, 'vf_explained_var': 0.932977557182312, 'entropy': 1.690892219543457, 'total_loss': 0.3808630108833313, 'kl': 0.010797310620546341}, 'sample_time_ms': 46766.607, 'num_steps_sampled': 1064400, 'grad_time_ms': 373.488, 'num_steps_trained': 1064400, 'load_time_ms': 0.669, 'update_time_ms': 2.615}",887,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.24915838241577,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,128400,1064400,{},107,297,-2.0857158829012263,2025-09-05_02-42-42,4.002645196931428,3651949,1757032962,3.9592997422415044,36723.330835580826,104758,4.026936026936027
+cda-server-2,False,5115.0283489227295,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.27969759702682495, 'cur_kl_coeff': 1.095970869064331, 'policy_loss': -0.03837059065699577, 'vf_explained_var': 0.9554041624069214, 'entropy': 1.6551342010498047, 'total_loss': 0.24782387912273407, 'kl': 0.005927965976297855}, 'sample_time_ms': 46737.826, 'num_steps_sampled': 1065600, 'grad_time_ms': 373.007, 'num_steps_trained': 1065600, 'load_time_ms': 0.668, 'update_time_ms': 2.641}",888,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.152766942977905,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,129600,1065600,{},108,299,-4.134992348498914,2025-09-05_02-43-29,4.11362182172817,3651949,1757033009,3.943352652712832,36770.483602523804,105057,4.036789297658863
+cda-server-2,False,5162.040598630905,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.6938936114311218, 'cur_kl_coeff': 1.095970869064331, 'policy_loss': -0.037294093519449234, 'vf_explained_var': 0.8946120142936707, 'entropy': 1.740850567817688, 'total_loss': 0.6664432287216187, 'kl': 0.008981702849268913}, 'sample_time_ms': 46733.195, 'num_steps_sampled': 1066800, 'grad_time_ms': 372.173, 'num_steps_trained': 1066800, 'load_time_ms': 0.669, 'update_time_ms': 2.666}",889,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.01224970817566,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,130800,1066800,{},109,297,-6.7087742382483775,2025-09-05_02-44-16,4.020778264405768,3651949,1757033056,3.929619645125384,36817.49585223198,105354,4.047138047138047
+cda-server-2,False,5209.26691365242,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.899989128112793, 'cur_kl_coeff': 1.095970869064331, 'policy_loss': -0.03782425820827484, 'vf_explained_var': 0.8698816895484924, 'entropy': 1.7120951414108276, 'total_loss': 0.8721082210540771, 'kl': 0.009072682820260525}, 'sample_time_ms': 46776.242, 'num_steps_sampled': 1068000, 'grad_time_ms': 371.55, 'num_steps_trained': 1068000, 'load_time_ms': 0.677, 'update_time_ms': 2.649}",890,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.22631502151489,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,132000,1068000,{},110,297,-10.053639210287667,2025-09-05_02-45-03,4.266469976934626,3651949,1757033103,3.9336813886014497,36864.722167253494,105651,4.040404040404041
+cda-server-2,False,5256.301217556,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 2.759617328643799, 'cur_kl_coeff': 1.095970869064331, 'policy_loss': -0.046280644834041595, 'vf_explained_var': 0.7130245566368103, 'entropy': 1.799234390258789, 'total_loss': 2.723513603210449, 'kl': 0.009285876527428627}, 'sample_time_ms': 46776.765, 'num_steps_sampled': 1069200, 'grad_time_ms': 371.702, 'num_steps_trained': 1069200, 'load_time_ms': 0.671, 'update_time_ms': 2.627}",891,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.03430390357971,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,133200,1069200,{},111,291,-18.307916490467935,2025-09-05_02-45-51,4.002604687615128,3651949,1757033151,3.8079124917476403,36911.756471157074,105942,4.123711340206185
+cda-server-2,False,5303.68047785759,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.4721151888370514, 'cur_kl_coeff': 1.095970869064331, 'policy_loss': -0.04699081927537918, 'vf_explained_var': 0.9227263331413269, 'entropy': 1.6817357540130615, 'total_loss': 0.43441906571388245, 'kl': 0.008480795659124851}, 'sample_time_ms': 46772.906, 'num_steps_sampled': 1070400, 'grad_time_ms': 372.005, 'num_steps_trained': 1070400, 'load_time_ms': 0.673, 'update_time_ms': 2.595}",892,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.379260301589966,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,134400,1070400,{},112,297,-6.038535520083222,2025-09-05_02-46-38,4.135109194132127,3651949,1757033198,3.946062820473973,36959.135731458664,106239,4.033670033670034
+cda-server-2,False,5350.6843984127045,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 1.7970614433288574, 'cur_kl_coeff': 1.095970869064331, 'policy_loss': -0.0522477813065052, 'vf_explained_var': 0.7884857654571533, 'entropy': 1.8698594570159912, 'total_loss': 1.7521368265151978, 'kl': 0.006681882310658693}, 'sample_time_ms': 46775.321, 'num_steps_sampled': 1071600, 'grad_time_ms': 371.68, 'num_steps_trained': 1071600, 'load_time_ms': 0.672, 'update_time_ms': 2.618}",893,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.003920555114746,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,135600,1071600,{},113,291,-13.745583807755729,2025-09-05_02-47-25,4.416917507335905,3651949,1757033245,3.8267432889839528,37006.13965201378,106530,4.116838487972508
+cda-server-2,False,5397.945260763168,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 2.713960886001587, 'cur_kl_coeff': 1.095970869064331, 'policy_loss': -0.03935150429606438, 'vf_explained_var': 0.7668807506561279, 'entropy': 1.818472146987915, 'total_loss': 2.6847376823425293, 'kl': 0.009241162799298763}, 'sample_time_ms': 46766.398, 'num_steps_sampled': 1072800, 'grad_time_ms': 373.22, 'num_steps_trained': 1072800, 'load_time_ms': 0.686, 'update_time_ms': 2.648}",894,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.26086235046387,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,136800,1072800,{},114,294,-17.249778558549597,2025-09-05_02-48-12,4.163614829428042,3651949,1757033292,3.8594631902865277,37053.40051436424,106824,4.091836734693878
+cda-server-2,False,5445.086297273636,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.10369556397199631, 'cur_kl_coeff': 1.095970869064331, 'policy_loss': -0.02356908842921257, 'vf_explained_var': 0.9814754128456116, 'entropy': 1.6760879755020142, 'total_loss': 0.09424015879631042, 'kl': 0.012877783738076687}, 'sample_time_ms': 46792.359, 'num_steps_sampled': 1074000, 'grad_time_ms': 375.339, 'num_steps_trained': 1074000, 'load_time_ms': 0.686, 'update_time_ms': 2.676}",895,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.14103651046753,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,138000,1074000,{},115,299,-0.031555037362107186,2025-09-05_02-48-59,4.191379142086214,3651949,1757033339,3.9842481849621962,37100.54155087471,107123,4.013377926421405
+cda-server-2,False,5491.909555435181,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.5763661861419678, 'cur_kl_coeff': 1.095970869064331, 'policy_loss': -0.031372055411338806, 'vf_explained_var': 0.9221466779708862, 'entropy': 1.6882249116897583, 'total_loss': 0.574187159538269, 'kl': 0.026636656373739243}, 'sample_time_ms': 46743.517, 'num_steps_sampled': 1075200, 'grad_time_ms': 375.275, 'num_steps_trained': 1075200, 'load_time_ms': 0.686, 'update_time_ms': 2.677}",896,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.8232581615448,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,139200,1075200,{},116,297,-9.979543017466682,2025-09-05_02-49-46,4.161848498952017,3651949,1757033386,3.9476482385784197,37147.364809036255,107420,4.037037037037037
+cda-server-2,False,5539.260118484497,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.49259281158447266, 'cur_kl_coeff': 1.6439563035964966, 'policy_loss': -0.033091820776462555, 'vf_explained_var': 0.9307975769042969, 'entropy': 1.7111220359802246, 'total_loss': 0.47319602966308594, 'kl': 0.008330505341291428}, 'sample_time_ms': 46754.082, 'num_steps_sampled': 1076400, 'grad_time_ms': 374.842, 'num_steps_trained': 1076400, 'load_time_ms': 0.681, 'update_time_ms': 2.689}",897,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.350563049316406,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,140400,1076400,{},117,298,-7.166350155397293,2025-09-05_02-50-34,4.0038304862762,3651949,1757033434,3.9424373322039,37194.71537208557,107718,4.0369127516778525
+cda-server-2,False,5586.411970376968,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 3.5017998218536377, 'cur_kl_coeff': 1.6439563035964966, 'policy_loss': -0.039462730288505554, 'vf_explained_var': 0.7700036764144897, 'entropy': 1.814970850944519, 'total_loss': 3.46822190284729, 'kl': 0.003579681972041726}, 'sample_time_ms': 46754.347, 'num_steps_sampled': 1077600, 'grad_time_ms': 374.398, 'num_steps_trained': 1077600, 'load_time_ms': 0.683, 'update_time_ms': 2.696}",898,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.15185189247131,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,141600,1077600,{},118,291,-25.087125731798764,2025-09-05_02-51-21,4.001346690610941,3651949,1757033481,3.8632689902371355,37241.86722397804,108009,4.0893470790378
+cda-server-2,False,5634.371858358383,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.32681289315223694, 'cur_kl_coeff': 0.8219781517982483, 'policy_loss': -0.054572828114032745, 'vf_explained_var': 0.9426939487457275, 'entropy': 1.5868582725524902, 'total_loss': 0.2952921986579895, 'kl': 0.02804473787546158}, 'sample_time_ms': 46849.064, 'num_steps_sampled': 1078800, 'grad_time_ms': 374.373, 'num_steps_trained': 1078800, 'load_time_ms': 0.672, 'update_time_ms': 2.733}",899,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.959887981414795,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,142800,1078800,{},119,299,-2.2536769498871116,2025-09-05_02-52-09,4.002123115679182,3651949,1757033529,3.931380586483746,37289.82711195946,108308,4.050167224080267
+cda-server-2,False,5681.578128814697,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.4795079827308655, 'cur_kl_coeff': 1.2329672574996948, 'policy_loss': -0.0424935556948185, 'vf_explained_var': 0.9212221503257751, 'entropy': 1.77272629737854, 'total_loss': 0.45257535576820374, 'kl': 0.012620753608644009}, 'sample_time_ms': 46846.575, 'num_steps_sampled': 1080000, 'grad_time_ms': 374.849, 'num_steps_trained': 1080000, 'load_time_ms': 0.667, 'update_time_ms': 2.751}",900,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.20627045631409,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,144000,1080000,{},120,296,-2.988042588931627,2025-09-05_02-52-56,4.000800762474745,3651949,1757033576,3.9459430751420728,37337.03338241577,108604,4.04054054054054
+cda-server-2,False,5728.386974811554,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.6655850410461426, 'cur_kl_coeff': 1.2329672574996948, 'policy_loss': -0.037903402000665665, 'vf_explained_var': 0.9012076258659363, 'entropy': 1.761465311050415, 'total_loss': 0.6372129917144775, 'kl': 0.007730389013886452}, 'sample_time_ms': 46821.314, 'num_steps_sampled': 1081200, 'grad_time_ms': 377.563, 'num_steps_trained': 1081200, 'load_time_ms': 0.676, 'update_time_ms': 2.716}",901,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.80884599685669,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,145200,1081200,{},121,297,-6.174561097195088,2025-09-05_02-53-43,4.089252805379677,3651949,1757033623,3.92688879717915,37383.84222841263,108901,4.047138047138047
+cda-server-2,False,5775.551279306412,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.171039879322052, 'cur_kl_coeff': 1.2329672574996948, 'policy_loss': -0.04213587939739227, 'vf_explained_var': 0.9681374430656433, 'entropy': 1.6764439344406128, 'total_loss': 0.14072200655937195, 'kl': 0.009585012681782246}, 'sample_time_ms': 46799.281, 'num_steps_sampled': 1082400, 'grad_time_ms': 378.052, 'num_steps_trained': 1082400, 'load_time_ms': 0.675, 'update_time_ms': 2.78}",902,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.16430449485779,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,146400,1082400,{},122,297,-4.03311021608916,2025-09-05_02-54-30,4.13466924484949,3651949,1757033670,3.9496003397005945,37431.006532907486,109198,4.033670033670034
+cda-server-2,False,5822.614198207855,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.09765538573265076, 'cur_kl_coeff': 1.2329672574996948, 'policy_loss': -0.029740571975708008, 'vf_explained_var': 0.9826189875602722, 'entropy': 1.6684578657150269, 'total_loss': 0.08354974538087845, 'kl': 0.012680732645094395}, 'sample_time_ms': 46806.342, 'num_steps_sampled': 1083600, 'grad_time_ms': 376.857, 'num_steps_trained': 1083600, 'load_time_ms': 0.673, 'update_time_ms': 2.758}",903,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.06291890144348,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,147600,1083600,{},123,299,4.000084782639999,2025-09-05_02-55-17,4.166484561267723,3651949,1757033717,4.000754760716125,37478.06945180893,109497,4.0
+cda-server-2,False,5870.258234739304,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 1.1872496604919434, 'cur_kl_coeff': 1.2329672574996948, 'policy_loss': -0.03866337984800339, 'vf_explained_var': 0.8462570905685425, 'entropy': 1.7076233625411987, 'total_loss': 1.1553927659988403, 'kl': 0.0055202278308570385}, 'sample_time_ms': 46846.239, 'num_steps_sampled': 1084800, 'grad_time_ms': 375.242, 'num_steps_trained': 1084800, 'load_time_ms': 0.67, 'update_time_ms': 2.783}",904,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.644036531448364,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,148800,1084800,{},124,295,-11.222962823557705,2025-09-05_02-56-05,4.50534781335083,3651949,1757033765,3.8742699562983804,37525.71348834038,109792,4.084745762711864
+cda-server-2,False,5918.874381065369,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.348245233297348, 'cur_kl_coeff': 1.2329672574996948, 'policy_loss': -0.037835970520973206, 'vf_explained_var': 0.944329023361206, 'entropy': 1.6153223514556885, 'total_loss': 0.32013019919395447, 'kl': 0.007884159684181213}, 'sample_time_ms': 46993.235, 'num_steps_sampled': 1086000, 'grad_time_ms': 375.657, 'num_steps_trained': 1086000, 'load_time_ms': 0.681, 'update_time_ms': 2.781}",905,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",48.61614632606506,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,150000,1086000,{},125,297,-6.536665427125055,2025-09-05_02-56-53,4.001335252121653,3651949,1757033813,3.941152580223552,37574.32963466644,110089,4.037037037037037
+cda-server-2,False,5965.760763883591,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.08283431082963943, 'cur_kl_coeff': 1.2329672574996948, 'policy_loss': -0.042511459439992905, 'vf_explained_var': 0.9853301644325256, 'entropy': 1.63007390499115, 'total_loss': 0.05353569611907005, 'kl': 0.010716300457715988}, 'sample_time_ms': 46998.929, 'num_steps_sampled': 1087200, 'grad_time_ms': 376.187, 'num_steps_trained': 1087200, 'load_time_ms': 0.682, 'update_time_ms': 2.841}",906,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.886382818222046,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,151200,1087200,{},126,299,-0.04245865820154471,2025-09-05_02-57-40,4.00143553875155,3651949,1757033860,3.986681393814414,37621.216017484665,110388,4.010033444816053
+cda-server-2,False,6013.07399225235,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.0032169807236641645, 'cur_kl_coeff': 1.2329672574996948, 'policy_loss': -0.1030496209859848, 'vf_explained_var': 0.9994156956672668, 'entropy': 1.659559965133667, 'total_loss': -0.0704251229763031, 'kl': 0.023851003497838974}, 'sample_time_ms': 46995.559, 'num_steps_sampled': 1088400, 'grad_time_ms': 375.787, 'num_steps_trained': 1088400, 'load_time_ms': 0.69, 'update_time_ms': 2.86}",907,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.313228368759155,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,152400,1088400,{},127,300,4.000079229778374,2025-09-05_02-58-28,4.155390242731852,3651949,1757033908,4.000726101986216,37668.529245853424,110688,4.0
+cda-server-2,False,6060.871701717377,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.8205611705780029, 'cur_kl_coeff': 1.8494508266448975, 'policy_loss': -0.05103403329849243, 'vf_explained_var': 0.872207760810852, 'entropy': 1.6825008392333984, 'total_loss': 0.7823768854141235, 'kl': 0.0069478172808885574}, 'sample_time_ms': 47059.525, 'num_steps_sampled': 1089600, 'grad_time_ms': 376.45, 'num_steps_trained': 1089600, 'load_time_ms': 0.692, 'update_time_ms': 2.842}",908,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.797709465026855,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,153600,1089600,{},128,295,-8.109032572593708,2025-09-05_02-59-15,4.002247635056653,3651949,1757033955,3.8875018287193615,37716.32695531845,110983,4.074576271186441
+cda-server-2,False,6107.252467870712,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 1.3974435329437256, 'cur_kl_coeff': 1.8494508266448975, 'policy_loss': -0.03511720895767212, 'vf_explained_var': 0.8257991075515747, 'entropy': 1.7969785928726196, 'total_loss': 1.378829836845398, 'kl': 0.008923310786485672}, 'sample_time_ms': 46902.404, 'num_steps_sampled': 1090800, 'grad_time_ms': 375.794, 'num_steps_trained': 1090800, 'load_time_ms': 0.693, 'update_time_ms': 2.761}",909,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.38076615333557,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,154800,1090800,{},129,292,-10.445097759987831,2025-09-05_03-00-02,4.002368141152433,3651949,1757034002,3.8083283991034387,37762.70772147179,111275,4.11986301369863
+cda-server-2,False,6154.848546504974,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.0062546562403440475, 'cur_kl_coeff': 1.8494508266448975, 'policy_loss': -0.09956976026296616, 'vf_explained_var': 0.9988368153572083, 'entropy': 1.6119977235794067, 'total_loss': -0.05805457383394241, 'kl': 0.019065406173467636}, 'sample_time_ms': 46938.898, 'num_steps_sampled': 1092000, 'grad_time_ms': 378.177, 'num_steps_trained': 1092000, 'load_time_ms': 0.732, 'update_time_ms': 2.732}",910,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.596078634262085,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,156000,1092000,{},130,300,4.000071359967125,2025-09-05_03-00-49,4.001047534662767,3651949,1757034049,4.000205901437618,37810.30380010605,111575,4.0
+cda-server-2,False,6202.097699642181,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.6251590251922607, 'cur_kl_coeff': 1.8494508266448975, 'policy_loss': -0.028948571532964706, 'vf_explained_var': 0.9052460193634033, 'entropy': 1.7085204124450684, 'total_loss': 0.6010306477546692, 'kl': 0.002606305293738842}, 'sample_time_ms': 46983.395, 'num_steps_sampled': 1093200, 'grad_time_ms': 377.549, 'num_steps_trained': 1093200, 'load_time_ms': 0.722, 'update_time_ms': 2.873}",911,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.24915313720703,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,157200,1093200,{},131,298,-8.681836160411386,2025-09-05_03-01-37,4.4167532534090475,3651949,1757034097,3.96016927547165,37857.552953243256,111873,4.026845637583893
+cda-server-2,False,6249.153947591782,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.6695041656494141, 'cur_kl_coeff': 0.9247254133224487, 'policy_loss': -0.039134785532951355, 'vf_explained_var': 0.90013188123703, 'entropy': 1.6588191986083984, 'total_loss': 0.6479641199111938, 'kl': 0.019026966765522957}, 'sample_time_ms': 46974.381, 'num_steps_sampled': 1094400, 'grad_time_ms': 375.768, 'num_steps_trained': 1094400, 'load_time_ms': 0.727, 'update_time_ms': 2.839}",912,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.05624794960022,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,158400,1094400,{},132,296,-5.102389635603522,2025-09-05_03-02-24,4.244200512234446,3651949,1757034144,3.9565869946037306,37904.609201192856,112169,4.030405405405405
+cda-server-2,False,6296.295668840408,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.008423120714724064, 'cur_kl_coeff': 0.9247254133224487, 'policy_loss': -0.047248583287000656, 'vf_explained_var': 0.9983489513397217, 'entropy': 1.6233975887298584, 'total_loss': -0.02756035327911377, 'kl': 0.012182105332612991}, 'sample_time_ms': 46984.165, 'num_steps_sampled': 1095600, 'grad_time_ms': 373.939, 'num_steps_trained': 1095600, 'load_time_ms': 0.718, 'update_time_ms': 2.846}",913,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.14172124862671,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,159600,1095600,{},133,300,-4.0368726176782985,2025-09-05_03-03-11,4.000725931831873,3651949,1757034191,3.973399068350616,37951.75092244148,112469,4.016666666666667
+cda-server-2,False,6343.30012345314,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.552894651889801, 'cur_kl_coeff': 0.9247254133224487, 'policy_loss': -0.036006052047014236, 'vf_explained_var': 0.931046187877655, 'entropy': 1.7760496139526367, 'total_loss': 0.5230053067207336, 'kl': 0.006614684127271175}, 'sample_time_ms': 46919.318, 'num_steps_sampled': 1096800, 'grad_time_ms': 374.83, 'num_steps_trained': 1096800, 'load_time_ms': 0.722, 'update_time_ms': 2.801}",914,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.004454612731934,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,160800,1096800,{},134,296,-6.024787100581303,2025-09-05_03-03-58,4.002575908744511,3651949,1757034238,3.9198022842876283,37998.755377054214,112765,4.050675675675675
+cda-server-2,False,6390.3256759643555,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.2663804292678833, 'cur_kl_coeff': 0.9247254133224487, 'policy_loss': -0.029532097280025482, 'vf_explained_var': 0.9555729627609253, 'entropy': 1.5449742078781128, 'total_loss': 0.24412457644939423, 'kl': 0.007868574000895023}, 'sample_time_ms': 46760.09, 'num_steps_sampled': 1098000, 'grad_time_ms': 375.112, 'num_steps_trained': 1098000, 'load_time_ms': 0.71, 'update_time_ms': 2.769}",915,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.02555251121521,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,162000,1098000,{},135,298,-4.096244807617365,2025-09-05_03-04-45,4.005924476954452,3651949,1757034285,3.9597168225653534,38045.78092956543,113063,4.023489932885906
+cda-server-2,False,6437.271858453751,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.0021694996394217014, 'cur_kl_coeff': 0.9247254133224487, 'policy_loss': -0.1111738532781601, 'vf_explained_var': 0.9995922446250916, 'entropy': 1.6540554761886597, 'total_loss': -0.08195843547582626, 'kl': 0.029247526079416275}, 'sample_time_ms': 46765.871, 'num_steps_sampled': 1099200, 'grad_time_ms': 375.423, 'num_steps_trained': 1099200, 'load_time_ms': 0.706, 'update_time_ms': 2.72}",916,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.94618248939514,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,163200,1099200,{},136,300,4.000068956729939,2025-09-05_03-05-32,4.306151855380639,3651949,1757034332,4.001476746470644,38092.727112054825,113363,4.0
+cda-server-2,False,6484.297063112259,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 0.5783279538154602, 'cur_kl_coeff': 1.3870880603790283, 'policy_loss': -0.03263545408844948, 'vf_explained_var': 0.9230349659919739, 'entropy': 1.7275382280349731, 'total_loss': 0.549310564994812, 'kl': 0.002608383074402809}, 'sample_time_ms': 46738.849, 'num_steps_sampled': 1100400, 'grad_time_ms': 373.732, 'num_steps_trained': 1100400, 'load_time_ms': 0.693, 'update_time_ms': 2.686}",917,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",47.0252046585083,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,164400,1100400,{},137,297,-11.673738478321681,2025-09-05_03-06-19,4.002616083422177,3651949,1757034379,3.9404715761026687,38139.75231671333,113660,4.047138047138047
+cda-server-2,False,6531.163638830185,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 1.669813632965088, 'cur_kl_coeff': 0.6935440301895142, 'policy_loss': -0.0387318879365921, 'vf_explained_var': 0.799774706363678, 'entropy': 1.750069499015808, 'total_loss': 1.6407551765441895, 'kl': 0.013947629369795322}, 'sample_time_ms': 46646.563, 'num_steps_sampled': 1101600, 'grad_time_ms': 372.928, 'num_steps_trained': 1101600, 'load_time_ms': 0.688, 'update_time_ms': 2.681}",918,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.866575717926025,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,165600,1101600,{},138,295,-13.61323078722381,2025-09-05_03-07-06,4.001927960708255,3651949,1757034426,3.8888666033676937,38186.61889243126,113955,4.074576271186441
+cda-server-2,False,6577.984277009964,"{'default': {'cur_lr': 4.999999873689376e-05, 'vf_loss': 2.418276786804199, 'cur_kl_coeff': 0.6935440301895142, 'policy_loss': -0.046036407351493835, 'vf_explained_var': 0.7593923211097717, 'entropy': 1.7718133926391602, 'total_loss': 2.378685235977173, 'kl': 0.009292426519095898}, 'sample_time_ms': 46687.135, 'num_steps_sampled': 1102800, 'grad_time_ms': 376.168, 'num_steps_trained': 1102800, 'load_time_ms': 0.711, 'update_time_ms': 2.739}",919,"{'clip_actions': True, 'vf_share_layers': False, 'entropy_coeff': 0.0, 'clip_param': 0.3, 'lr': 5e-05, 'monitor': False, 'observation_filter': 'MeanStdFilter', 'input': 'sampler', 'sample_async': False, 'multiagent': {'policies_to_train': None, 'policy_graphs': {}, 'policy_mapping_fn': None}, 'tf_session_args': {'device_count': {'CPU': 1}, 'intra_op_parallelism_threads': 2, 'log_device_placement': False, 'allow_soft_placement': True, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}}, 'collect_metrics_timeout': 180, 'synchronize_filters': True, 'sgd_minibatch_size': 128, 'output': None, 'use_gae': True, 'num_gpus_per_worker': 0, 'batch_mode': 'truncate_episodes', 'compress_observations': False, 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8, 'intra_op_parallelism_threads': 8}, 'custom_resources_per_worker': {}, 'num_sgd_iter': 30, 'simple_optimizer': False, 'num_workers': 3, 'output_max_file_size': 67108864, 'input_evaluation': None, 'num_envs_per_worker': 1, 'preprocessor_pref': 'deepmind', 'callbacks': {'on_train_result': None, 'on_episode_step': None, 'on_sample_end': None, 'on_episode_end': None, 'on_episode_start': None}, 'num_gpus': 0, 'straggler_mitigation': False, 'grad_clip': None, 'output_compress_columns': ['obs', 'new_obs'], 'kl_target': 0.01, 'vf_loss_coeff': 1.0, 'num_cpus_for_driver': 1, 'num_cpus_per_worker': 1, 'env': 'Zhenxin_S_FC', 'lr_schedule': None, 'vf_clip_param': 10.0, 'log_level': 'INFO', 'postprocess_inputs': False, 'optimizer': {}, 'model': {'use_lstm': False, 'dim': 84, 'max_seq_len': 20, 'fcnet_hiddens': [128, 128, 128], 'zero_mean': True, 'custom_preprocessor': None, 'grayscale': False, 'squash_to_range': False, 'lstm_cell_size': 256, 'conv_activation': 'relu', 'conv_filters': None, 'lstm_use_prev_action_reward': False, 'free_log_std': False, 'framestack': True, 'custom_model': None, 'custom_options': {}, 'fcnet_activation': 'tanh'}, 'env_config': {'generalize': False, 'run_valid': False}, 'horizon': 50, 'gamma': 0.99, 'clip_rewards': None, 'kl_coeff': 0.2, 'sample_batch_size': 200, 'lambda': 1.0, 'train_batch_size': 1200}",46.82063817977905,0,{},881ce36181fe42dabe29289bda5f7577,10.157.146.2,1200,166800,1102800,{},139,293,-19.70253313989107,2025-09-05_03-07-53,4.198803029835787,3651949,1757034473,3.877643137113445,38233.43953061104,114248,4.081911262798635
diff --git a/experiments/optimize-Zhenxin_S_FC_65nmPTM-run14/PPO_Zhenxin_S_FC_0_2025-09-04_16-10-519x116nc6/result.json b/experiments/optimize-Zhenxin_S_FC_65nmPTM-run14/PPO_Zhenxin_S_FC_0_2025-09-04_16-10-519x116nc6/result.json
new file mode 100644
index 0000000..273688e
--- /dev/null
+++ b/experiments/optimize-Zhenxin_S_FC_65nmPTM-run14/PPO_Zhenxin_S_FC_0_2025-09-04_16-10-519x116nc6/result.json
@@ -0,0 +1,940 @@
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 140.99133276939392, "info": {"sample_time_ms": 139859.504, "num_steps_trained": 1200, "grad_time_ms": 653.542, "default": {"cur_kl_coeff": 0.20000000298023224, "vf_loss": 2231.562255859375, "policy_loss": -0.11362738162279129, "vf_explained_var": -0.0003447002964094281, "entropy": 15.597346305847168, "cur_lr": 4.999999873689376e-05, "total_loss": 2231.454833984375, "kl": 0.03053244948387146}, "load_time_ms": 27.299, "num_steps_sampled": 1200, "update_time_ms": 416.916}, "training_iteration": 1, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 140.99133276939392, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 1200, "timesteps_total": 1200, "custom_metrics": {}, "iterations_since_restore": 1, "episodes_this_iter": 24, "episode_reward_min": -100.0666019790363, "date": "2025-09-04_16-15-07", "episode_reward_max": -93.22332074316793, "pid": 3651948, "timestamp": 1756995307, "episode_reward_mean": -97.30795660981228, "time_total_s": 140.99133276939392, "episodes_total": 24, "episode_len_mean": 50.0}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 187.5420961380005, "info": {"sample_time_ms": 92992.711, "num_steps_trained": 2400, "grad_time_ms": 534.807, "default": {"cur_kl_coeff": 0.30000001192092896, "vf_loss": 2008.263427734375, "policy_loss": -0.11293138563632965, "vf_explained_var": -0.0319129154086113, "entropy": 15.605307579040527, "cur_lr": 4.999999873689376e-05, "total_loss": 2008.15966796875, "kl": 0.030827680602669716}, "load_time_ms": 14.011, "num_steps_sampled": 2400, "update_time_ms": 209.994}, "training_iteration": 2, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 46.55076336860657, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 2400, "timesteps_total": 2400, "custom_metrics": {}, "iterations_since_restore": 2, "episodes_this_iter": 24, "episode_reward_min": -100.0666019790363, "date": "2025-09-04_16-15-53", "episode_reward_max": -93.22332074316793, "pid": 3651948, "timestamp": 1756995353, "episode_reward_mean": -97.6085290053284, "time_total_s": 187.5420961380005, "episodes_total": 48, "episode_len_mean": 50.0}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 248.49070477485657, "info": {"sample_time_ms": 82189.195, "num_steps_trained": 3600, "grad_time_ms": 475.83, "default": {"cur_kl_coeff": 0.44999995827674866, "vf_loss": 1854.1104736328125, "policy_loss": -0.10487513989210129, "vf_explained_var": -0.018647870048880577, "entropy": 15.596461296081543, "cur_lr": 4.999999873689376e-05, "total_loss": 1854.0181884765625, "kl": 0.02738937921822071}, "load_time_ms": 9.619, "num_steps_sampled": 3600, "update_time_ms": 140.997}, "training_iteration": 3, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 60.94860863685608, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 3600, "timesteps_total": 3600, "custom_metrics": {}, "iterations_since_restore": 3, "episodes_this_iter": 24, "episode_reward_min": -100.0666019790363, "date": "2025-09-04_16-16-54", "episode_reward_max": -93.22332074316793, "pid": 3651948, "timestamp": 1756995414, "episode_reward_mean": -97.54624563833285, "time_total_s": 248.49070477485657, "episodes_total": 72, "episode_len_mean": 50.0}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 289.3633248806, "info": {"sample_time_ms": 71768.578, "num_steps_trained": 4800, "grad_time_ms": 446.461, "default": {"cur_kl_coeff": 0.675000011920929, "vf_loss": 1733.2108154296875, "policy_loss": -0.11116102337837219, "vf_explained_var": -0.05004839599132538, "entropy": 15.587655067443848, "cur_lr": 4.999999873689376e-05, "total_loss": 1733.1148681640625, "kl": 0.022528911009430885}, "load_time_ms": 7.375, "num_steps_sampled": 4800, "update_time_ms": 106.338}, "training_iteration": 4, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.87262010574341, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 4800, "timesteps_total": 4800, "custom_metrics": {}, "iterations_since_restore": 4, "episodes_this_iter": 24, "episode_reward_min": -100.0666019790363, "date": "2025-09-04_16-17-35", "episode_reward_max": -91.89653622755112, "pid": 3651948, "timestamp": 1756995455, "episode_reward_mean": -97.5726961111477, "time_total_s": 289.3633248806, "episodes_total": 96, "episode_len_mean": 50.0}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 329.5035173892975, "info": {"sample_time_ms": 65366.457, "num_steps_trained": 6000, "grad_time_ms": 432.08, "default": {"cur_kl_coeff": 1.0125000476837158, "vf_loss": 1653.20263671875, "policy_loss": -0.10911934822797775, "vf_explained_var": -0.07681050896644592, "entropy": 15.577970504760742, "cur_lr": 4.999999873689376e-05, "total_loss": 1653.112548828125, "kl": 0.01889631897211075}, "load_time_ms": 6.065, "num_steps_sampled": 6000, "update_time_ms": 85.553}, "training_iteration": 5, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.14019250869751, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 6000, "timesteps_total": 6000, "custom_metrics": {}, "iterations_since_restore": 5, "episodes_this_iter": 24, "episode_reward_min": -99.99385424763929, "date": "2025-09-04_16-18-15", "episode_reward_max": -91.89653622755112, "pid": 3651948, "timestamp": 1756995495, "episode_reward_mean": -97.58844576213276, "time_total_s": 329.5035173892975, "episodes_total": 120, "episode_len_mean": 50.0}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 369.60118436813354, "info": {"sample_time_ms": 61089.625, "num_steps_trained": 7200, "grad_time_ms": 424.118, "default": {"cur_kl_coeff": 1.0125000476837158, "vf_loss": 1622.42236328125, "policy_loss": -0.0988093689084053, "vf_explained_var": -0.13713043928146362, "entropy": 15.560051918029785, "cur_lr": 4.999999873689376e-05, "total_loss": 1622.34228515625, "kl": 0.018548818305134773}, "load_time_ms": 5.173, "num_steps_sampled": 7200, "update_time_ms": 71.689}, "training_iteration": 6, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.09766697883606, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 7200, "timesteps_total": 7200, "custom_metrics": {}, "iterations_since_restore": 6, "episodes_this_iter": 24, "episode_reward_min": -99.99385424763929, "date": "2025-09-04_16-18-55", "episode_reward_max": -91.89653622755112, "pid": 3651948, "timestamp": 1756995535, "episode_reward_mean": -97.54368201093162, "time_total_s": 369.60118436813354, "episodes_total": 144, "episode_len_mean": 50.0}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 409.36658096313477, "info": {"sample_time_ms": 57991.308, "num_steps_trained": 8400, "grad_time_ms": 414.365, "default": {"cur_kl_coeff": 1.0125000476837158, "vf_loss": 1449.89404296875, "policy_loss": -0.10638123005628586, "vf_explained_var": -0.13925179839134216, "entropy": 15.54902172088623, "cur_lr": 4.999999873689376e-05, "total_loss": 1449.80712890625, "kl": 0.019342221319675446}, "load_time_ms": 4.528, "num_steps_sampled": 8400, "update_time_ms": 61.871}, "training_iteration": 7, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.76539659500122, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 8400, "timesteps_total": 8400, "custom_metrics": {}, "iterations_since_restore": 7, "episodes_this_iter": 24, "episode_reward_min": -99.99385424763929, "date": "2025-09-04_16-19-35", "episode_reward_max": -30.71669919267596, "pid": 3651948, "timestamp": 1756995575, "episode_reward_mean": -96.51648214196463, "time_total_s": 409.36658096313477, "episodes_total": 168, "episode_len_mean": 49.64}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 449.37567710876465, "info": {"sample_time_ms": 55695.786, "num_steps_trained": 9600, "grad_time_ms": 409.353, "default": {"cur_kl_coeff": 1.0125000476837158, "vf_loss": 1406.4390869140625, "policy_loss": -0.10367625206708908, "vf_explained_var": -0.18408912420272827, "entropy": 15.551528930664062, "cur_lr": 4.999999873689376e-05, "total_loss": 1406.35302734375, "kl": 0.01746782474219799}, "load_time_ms": 4.044, "num_steps_sampled": 9600, "update_time_ms": 54.458}, "training_iteration": 8, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.00909614562988, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 9600, "timesteps_total": 9600, "custom_metrics": {}, "iterations_since_restore": 8, "episodes_this_iter": 24, "episode_reward_min": -99.89664753970594, "date": "2025-09-04_16-20-15", "episode_reward_max": -24.47619018840004, "pid": 3651948, "timestamp": 1756995615, "episode_reward_mean": -95.89876277925154, "time_total_s": 449.37567710876465, "episodes_total": 192, "episode_len_mean": 49.33}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 490.18978786468506, "info": {"sample_time_ms": 53999.365, "num_steps_trained": 10800, "grad_time_ms": 405.921, "default": {"cur_kl_coeff": 1.0125000476837158, "vf_loss": 1334.082275390625, "policy_loss": -0.10778095573186874, "vf_explained_var": -0.20987066626548767, "entropy": 15.519222259521484, "cur_lr": 4.999999873689376e-05, "total_loss": 1333.9931640625, "kl": 0.01847856305539608}, "load_time_ms": 3.674, "num_steps_sampled": 10800, "update_time_ms": 48.693}, "training_iteration": 9, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.81411075592041, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 10800, "timesteps_total": 10800, "custom_metrics": {}, "iterations_since_restore": 9, "episodes_this_iter": 25, "episode_reward_min": -99.89664753970594, "date": "2025-09-04_16-20-56", "episode_reward_max": -23.04501059558644, "pid": 3651948, "timestamp": 1756995656, "episode_reward_mean": -95.2285475105528, "time_total_s": 490.18978786468506, "episodes_total": 217, "episode_len_mean": 49.01}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 530.0455119609833, "info": {"sample_time_ms": 52548.508, "num_steps_trained": 12000, "grad_time_ms": 401.092, "default": {"cur_kl_coeff": 1.0125000476837158, "vf_loss": 1255.589599609375, "policy_loss": -0.11320510506629944, "vf_explained_var": -0.24970334768295288, "entropy": 15.53376293182373, "cur_lr": 4.999999873689376e-05, "total_loss": 1255.4962158203125, "kl": 0.019503416493535042}, "load_time_ms": 3.373, "num_steps_sampled": 12000, "update_time_ms": 44.053}, "training_iteration": 10, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.85572409629822, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 12000, "timesteps_total": 12000, "custom_metrics": {}, "iterations_since_restore": 10, "episodes_this_iter": 24, "episode_reward_min": -99.89664753970594, "date": "2025-09-04_16-21-36", "episode_reward_max": -23.04501059558644, "pid": 3651948, "timestamp": 1756995696, "episode_reward_mean": -94.32109097779768, "time_total_s": 530.0455119609833, "episodes_total": 241, "episode_len_mean": 48.7}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 569.8694930076599, "info": {"sample_time_ms": 42508.571, "num_steps_trained": 13200, "grad_time_ms": 371.291, "default": {"cur_kl_coeff": 1.0125000476837158, "vf_loss": 1171.419677734375, "policy_loss": -0.10418149828910828, "vf_explained_var": -0.26429101824760437, "entropy": 15.501246452331543, "cur_lr": 4.999999873689376e-05, "total_loss": 1171.3345947265625, "kl": 0.01891779899597168}, "load_time_ms": 0.705, "num_steps_sampled": 13200, "update_time_ms": 2.679}, "training_iteration": 11, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.823981046676636, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 13200, "timesteps_total": 13200, "custom_metrics": {}, "iterations_since_restore": 11, "episodes_this_iter": 25, "episode_reward_min": -99.9892110402293, "date": "2025-09-04_16-22-16", "episode_reward_max": -23.04501059558644, "pid": 3651948, "timestamp": 1756995736, "episode_reward_mean": -94.70175302960016, "time_total_s": 569.8694930076599, "episodes_total": 266, "episode_len_mean": 48.81}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 609.4798724651337, "info": {"sample_time_ms": 41820.453, "num_steps_trained": 14400, "grad_time_ms": 365.48, "default": {"cur_kl_coeff": 1.0125000476837158, "vf_loss": 1192.4371337890625, "policy_loss": -0.10855650901794434, "vf_explained_var": -0.3701235353946686, "entropy": 15.489436149597168, "cur_lr": 4.999999873689376e-05, "total_loss": 1192.3475341796875, "kl": 0.018641583621501923}, "load_time_ms": 0.704, "num_steps_sampled": 14400, "update_time_ms": 2.628}, "training_iteration": 12, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.610379457473755, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 14400, "timesteps_total": 14400, "custom_metrics": {}, "iterations_since_restore": 12, "episodes_this_iter": 24, "episode_reward_min": -99.9892110402293, "date": "2025-09-04_16-22-55", "episode_reward_max": -23.04501059558644, "pid": 3651948, "timestamp": 1756995775, "episode_reward_mean": -95.53298387289084, "time_total_s": 609.4798724651337, "episodes_total": 290, "episode_len_mean": 49.13}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 649.2679927349091, "info": {"sample_time_ms": 39703.982, "num_steps_trained": 15600, "grad_time_ms": 365.999, "default": {"cur_kl_coeff": 1.0125000476837158, "vf_loss": 1188.027099609375, "policy_loss": -0.10490735620260239, "vf_explained_var": -0.46393129229545593, "entropy": 15.496283531188965, "cur_lr": 4.999999873689376e-05, "total_loss": 1187.9417724609375, "kl": 0.019278578460216522}, "load_time_ms": 0.698, "num_steps_sampled": 15600, "update_time_ms": 2.577}, "training_iteration": 13, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.78812026977539, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 15600, "timesteps_total": 15600, "custom_metrics": {}, "iterations_since_restore": 13, "episodes_this_iter": 24, "episode_reward_min": -99.9892110402293, "date": "2025-09-04_16-23-35", "episode_reward_max": -25.85968405258626, "pid": 3651948, "timestamp": 1756995815, "episode_reward_mean": -96.20017378990354, "time_total_s": 649.2679927349091, "episodes_total": 314, "episode_len_mean": 49.45}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 689.2937431335449, "info": {"sample_time_ms": 39617.616, "num_steps_trained": 16800, "grad_time_ms": 367.68, "default": {"cur_kl_coeff": 1.0125000476837158, "vf_loss": 1119.98095703125, "policy_loss": -0.1071331575512886, "vf_explained_var": -0.3960515260696411, "entropy": 15.486916542053223, "cur_lr": 4.999999873689376e-05, "total_loss": 1119.8934326171875, "kl": 0.019492844119668007}, "load_time_ms": 0.713, "num_steps_sampled": 16800, "update_time_ms": 2.57}, "training_iteration": 14, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.025750398635864, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 16800, "timesteps_total": 16800, "custom_metrics": {}, "iterations_since_restore": 14, "episodes_this_iter": 24, "episode_reward_min": -100.10138569553668, "date": "2025-09-04_16-24-15", "episode_reward_max": -38.72316905582058, "pid": 3651948, "timestamp": 1756995855, "episode_reward_mean": -97.13363621111745, "time_total_s": 689.2937431335449, "episodes_total": 338, "episode_len_mean": 49.76}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 729.1800971031189, "info": {"sample_time_ms": 39591.578, "num_steps_trained": 18000, "grad_time_ms": 368.351, "default": {"cur_kl_coeff": 1.0125000476837158, "vf_loss": 1143.7335205078125, "policy_loss": -0.10774454474449158, "vf_explained_var": -0.4819021224975586, "entropy": 15.48103141784668, "cur_lr": 4.999999873689376e-05, "total_loss": 1143.6439208984375, "kl": 0.017882168292999268}, "load_time_ms": 0.697, "num_steps_sampled": 18000, "update_time_ms": 2.57}, "training_iteration": 15, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.886353969573975, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 18000, "timesteps_total": 18000, "custom_metrics": {}, "iterations_since_restore": 15, "episodes_this_iter": 24, "episode_reward_min": -100.10138569553668, "date": "2025-09-04_16-24-55", "episode_reward_max": -92.41579714679654, "pid": 3651948, "timestamp": 1756995895, "episode_reward_mean": -97.83491767123333, "time_total_s": 729.1800971031189, "episodes_total": 362, "episode_len_mean": 50.0}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 769.293693780899, "info": {"sample_time_ms": 39595.882, "num_steps_trained": 19200, "grad_time_ms": 365.663, "default": {"cur_kl_coeff": 1.0125000476837158, "vf_loss": 1088.394287109375, "policy_loss": -0.1057095155119896, "vf_explained_var": -0.46451839804649353, "entropy": 15.46332836151123, "cur_lr": 4.999999873689376e-05, "total_loss": 1088.307373046875, "kl": 0.01861894316971302}, "load_time_ms": 0.689, "num_steps_sampled": 19200, "update_time_ms": 2.606}, "training_iteration": 16, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.11359667778015, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 19200, "timesteps_total": 19200, "custom_metrics": {}, "iterations_since_restore": 16, "episodes_this_iter": 24, "episode_reward_min": -100.10138569553668, "date": "2025-09-04_16-25-35", "episode_reward_max": -92.41579714679654, "pid": 3651948, "timestamp": 1756995935, "episode_reward_mean": -97.72628511499211, "time_total_s": 769.293693780899, "episodes_total": 386, "episode_len_mean": 50.0}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 808.9712433815002, "info": {"sample_time_ms": 39585.098, "num_steps_trained": 20400, "grad_time_ms": 367.68, "default": {"cur_kl_coeff": 1.0125000476837158, "vf_loss": 1159.6478271484375, "policy_loss": -0.12469884753227234, "vf_explained_var": -0.5814424157142639, "entropy": 15.468914031982422, "cur_lr": 4.999999873689376e-05, "total_loss": 1159.5428466796875, "kl": 0.01942109689116478}, "load_time_ms": 0.7, "num_steps_sampled": 20400, "update_time_ms": 2.561}, "training_iteration": 17, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.677549600601196, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 20400, "timesteps_total": 20400, "custom_metrics": {}, "iterations_since_restore": 17, "episodes_this_iter": 24, "episode_reward_min": -100.10138569553668, "date": "2025-09-04_16-26-15", "episode_reward_max": -94.52557691990086, "pid": 3651948, "timestamp": 1756995975, "episode_reward_mean": -97.73629758608034, "time_total_s": 808.9712433815002, "episodes_total": 410, "episode_len_mean": 50.0}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 849.0680379867554, "info": {"sample_time_ms": 39592.423, "num_steps_trained": 21600, "grad_time_ms": 369.09, "default": {"cur_kl_coeff": 1.0125000476837158, "vf_loss": 1102.488037109375, "policy_loss": -0.10093361139297485, "vf_explained_var": -0.46737515926361084, "entropy": 15.47008228302002, "cur_lr": 4.999999873689376e-05, "total_loss": 1102.40625, "kl": 0.01886645331978798}, "load_time_ms": 0.73, "num_steps_sampled": 21600, "update_time_ms": 2.575}, "training_iteration": 18, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.09679460525513, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 21600, "timesteps_total": 21600, "custom_metrics": {}, "iterations_since_restore": 18, "episodes_this_iter": 25, "episode_reward_min": -99.94909641233812, "date": "2025-09-04_16-26-55", "episode_reward_max": -1.137450634299789, "pid": 3651948, "timestamp": 1756996015, "episode_reward_mean": -95.90983970514493, "time_total_s": 849.0680379867554, "episodes_total": 435, "episode_len_mean": 49.19}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 888.7057158946991, "info": {"sample_time_ms": 39476.752, "num_steps_trained": 22800, "grad_time_ms": 367.102, "default": {"cur_kl_coeff": 1.0125000476837158, "vf_loss": 1188.67138671875, "policy_loss": -0.11527708917856216, "vf_explained_var": -0.5554392337799072, "entropy": 15.453225135803223, "cur_lr": 4.999999873689376e-05, "total_loss": 1188.5748291015625, "kl": 0.018716327846050262}, "load_time_ms": 0.722, "num_steps_sampled": 22800, "update_time_ms": 2.588}, "training_iteration": 19, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.637677907943726, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 22800, "timesteps_total": 22800, "custom_metrics": {}, "iterations_since_restore": 19, "episodes_this_iter": 24, "episode_reward_min": -99.85778078216784, "date": "2025-09-04_16-27-35", "episode_reward_max": -1.137450634299789, "pid": 3651948, "timestamp": 1756996055, "episode_reward_mean": -95.80412959682307, "time_total_s": 888.7057158946991, "episodes_total": 459, "episode_len_mean": 49.19}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 928.5402855873108, "info": {"sample_time_ms": 39473.313, "num_steps_trained": 24000, "grad_time_ms": 368.399, "default": {"cur_kl_coeff": 1.0125000476837158, "vf_loss": 1222.9095458984375, "policy_loss": -0.1030873954296112, "vf_explained_var": -0.6650868654251099, "entropy": 15.46270751953125, "cur_lr": 4.999999873689376e-05, "total_loss": 1222.82666015625, "kl": 0.019915420562028885}, "load_time_ms": 0.726, "num_steps_sampled": 24000, "update_time_ms": 2.605}, "training_iteration": 20, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.834569692611694, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 24000, "timesteps_total": 24000, "custom_metrics": {}, "iterations_since_restore": 20, "episodes_this_iter": 24, "episode_reward_min": -99.85778078216784, "date": "2025-09-04_16-28-15", "episode_reward_max": -1.137450634299789, "pid": 3651948, "timestamp": 1756996095, "episode_reward_mean": -95.87886228236576, "time_total_s": 928.5402855873108, "episodes_total": 483, "episode_len_mean": 49.19}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 968.6658818721771, "info": {"sample_time_ms": 39501.469, "num_steps_trained": 25200, "grad_time_ms": 370.435, "default": {"cur_kl_coeff": 1.0125000476837158, "vf_loss": 1268.17236328125, "policy_loss": -0.09783076494932175, "vf_explained_var": -0.7616844177246094, "entropy": 15.439361572265625, "cur_lr": 4.999999873689376e-05, "total_loss": 1268.0931396484375, "kl": 0.018327785655856133}, "load_time_ms": 0.733, "num_steps_sampled": 25200, "update_time_ms": 2.6}, "training_iteration": 21, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.12559628486633, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 25200, "timesteps_total": 25200, "custom_metrics": {}, "iterations_since_restore": 21, "episodes_this_iter": 24, "episode_reward_min": -99.86032434277038, "date": "2025-09-04_16-28-55", "episode_reward_max": -1.137450634299789, "pid": 3651948, "timestamp": 1756996135, "episode_reward_mean": -95.82387425761877, "time_total_s": 968.6658818721771, "episodes_total": 507, "episode_len_mean": 49.19}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 1008.8821487426758, "info": {"sample_time_ms": 39559.169, "num_steps_trained": 26400, "grad_time_ms": 373.318, "default": {"cur_kl_coeff": 1.0125000476837158, "vf_loss": 1154.8974609375, "policy_loss": -0.10972815752029419, "vf_explained_var": -0.6122896671295166, "entropy": 15.398881912231445, "cur_lr": 4.999999873689376e-05, "total_loss": 1154.80810546875, "kl": 0.02011170983314514}, "load_time_ms": 0.735, "num_steps_sampled": 26400, "update_time_ms": 2.583}, "training_iteration": 22, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.21626687049866, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 26400, "timesteps_total": 26400, "custom_metrics": {}, "iterations_since_restore": 22, "episodes_this_iter": 24, "episode_reward_min": -99.86032434277038, "date": "2025-09-04_16-29-35", "episode_reward_max": -90.4005844146529, "pid": 3651948, "timestamp": 1756996175, "episode_reward_mean": -97.62980122668787, "time_total_s": 1008.8821487426758, "episodes_total": 531, "episode_len_mean": 50.0}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 1048.678347826004, "info": {"sample_time_ms": 39560.576, "num_steps_trained": 27600, "grad_time_ms": 372.732, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1281.469970703125, "policy_loss": -0.10534890741109848, "vf_explained_var": -0.8017933964729309, "entropy": 15.448863983154297, "cur_lr": 4.999999873689376e-05, "total_loss": 1281.388916015625, "kl": 0.015983637422323227}, "load_time_ms": 0.726, "num_steps_sampled": 27600, "update_time_ms": 2.585}, "training_iteration": 23, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.79619908332825, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 27600, "timesteps_total": 27600, "custom_metrics": {}, "iterations_since_restore": 23, "episodes_this_iter": 24, "episode_reward_min": -99.86032434277038, "date": "2025-09-04_16-30-15", "episode_reward_max": -90.4005844146529, "pid": 3651948, "timestamp": 1756996215, "episode_reward_mean": -97.49737593284527, "time_total_s": 1048.678347826004, "episodes_total": 555, "episode_len_mean": 50.0}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 1089.2785403728485, "info": {"sample_time_ms": 39620.043, "num_steps_trained": 28800, "grad_time_ms": 370.708, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1191.23193359375, "policy_loss": -0.09724703431129456, "vf_explained_var": -0.7418419718742371, "entropy": 15.407340049743652, "cur_lr": 4.999999873689376e-05, "total_loss": 1191.15380859375, "kl": 0.01257497537881136}, "load_time_ms": 0.723, "num_steps_sampled": 28800, "update_time_ms": 2.588}, "training_iteration": 24, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.60019254684448, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 28800, "timesteps_total": 28800, "custom_metrics": {}, "iterations_since_restore": 24, "episodes_this_iter": 24, "episode_reward_min": -99.98463372714971, "date": "2025-09-04_16-30-55", "episode_reward_max": -90.4005844146529, "pid": 3651948, "timestamp": 1756996255, "episode_reward_mean": -97.50489288226183, "time_total_s": 1089.2785403728485, "episodes_total": 579, "episode_len_mean": 50.0}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 1129.0712842941284, "info": {"sample_time_ms": 39613.007, "num_steps_trained": 30000, "grad_time_ms": 368.383, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1242.32080078125, "policy_loss": -0.12498721480369568, "vf_explained_var": -0.7720822095870972, "entropy": 15.434539794921875, "cur_lr": 4.999999873689376e-05, "total_loss": 1242.2161865234375, "kl": 0.013309704139828682}, "load_time_ms": 0.727, "num_steps_sampled": 30000, "update_time_ms": 2.583}, "training_iteration": 25, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.79274392127991, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 30000, "timesteps_total": 30000, "custom_metrics": {}, "iterations_since_restore": 25, "episodes_this_iter": 24, "episode_reward_min": -100.10329485311799, "date": "2025-09-04_16-31-35", "episode_reward_max": -94.09882496122897, "pid": 3651948, "timestamp": 1756996295, "episode_reward_mean": -97.35178240898782, "time_total_s": 1129.0712842941284, "episodes_total": 603, "episode_len_mean": 50.0}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 1168.7032897472382, "info": {"sample_time_ms": 39562.756, "num_steps_trained": 31200, "grad_time_ms": 370.497, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1173.93701171875, "policy_loss": -0.10411402583122253, "vf_explained_var": -0.6004043221473694, "entropy": 15.364545822143555, "cur_lr": 4.999999873689376e-05, "total_loss": 1173.8564453125, "kl": 0.01551245991140604}, "load_time_ms": 0.725, "num_steps_sampled": 31200, "update_time_ms": 2.535}, "training_iteration": 26, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.63200545310974, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 31200, "timesteps_total": 31200, "custom_metrics": {}, "iterations_since_restore": 26, "episodes_this_iter": 24, "episode_reward_min": -100.10329485311799, "date": "2025-09-04_16-32-15", "episode_reward_max": -57.801233031301635, "pid": 3651948, "timestamp": 1756996335, "episode_reward_mean": -97.0011269918407, "time_total_s": 1168.7032897472382, "episodes_total": 627, "episode_len_mean": 49.86}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 1208.4105989933014, "info": {"sample_time_ms": 39565.133, "num_steps_trained": 32400, "grad_time_ms": 371.029, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1281.2742919921875, "policy_loss": -0.10503542423248291, "vf_explained_var": -0.700732946395874, "entropy": 15.384541511535645, "cur_lr": 4.999999873689376e-05, "total_loss": 1281.1903076171875, "kl": 0.013778585940599442}, "load_time_ms": 0.749, "num_steps_sampled": 32400, "update_time_ms": 2.55}, "training_iteration": 27, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.70730924606323, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 32400, "timesteps_total": 32400, "custom_metrics": {}, "iterations_since_restore": 27, "episodes_this_iter": 24, "episode_reward_min": -100.10329485311799, "date": "2025-09-04_16-32-55", "episode_reward_max": -57.801233031301635, "pid": 3651948, "timestamp": 1756996375, "episode_reward_mean": -97.03697085146841, "time_total_s": 1208.4105989933014, "episodes_total": 651, "episode_len_mean": 49.86}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 1248.9155259132385, "info": {"sample_time_ms": 39606.656, "num_steps_trained": 33600, "grad_time_ms": 370.364, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1321.213623046875, "policy_loss": -0.10575778782367706, "vf_explained_var": -0.8148228526115417, "entropy": 15.369461059570312, "cur_lr": 4.999999873689376e-05, "total_loss": 1321.1295166015625, "kl": 0.014238353818655014}, "load_time_ms": 0.72, "num_steps_sampled": 33600, "update_time_ms": 2.546}, "training_iteration": 28, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.504926919937134, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 33600, "timesteps_total": 33600, "custom_metrics": {}, "iterations_since_restore": 28, "episodes_this_iter": 24, "episode_reward_min": -100.10329485311799, "date": "2025-09-04_16-33-35", "episode_reward_max": -57.801233031301635, "pid": 3651948, "timestamp": 1756996415, "episode_reward_mean": -97.03483582868591, "time_total_s": 1248.9155259132385, "episodes_total": 675, "episode_len_mean": 49.86}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 1288.9718182086945, "info": {"sample_time_ms": 39647.71, "num_steps_trained": 34800, "grad_time_ms": 371.231, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1341.10400390625, "policy_loss": -0.10665473341941833, "vf_explained_var": -0.8043767213821411, "entropy": 15.401509284973145, "cur_lr": 4.999999873689376e-05, "total_loss": 1341.019775390625, "kl": 0.014626596122980118}, "load_time_ms": 0.72, "num_steps_sampled": 34800, "update_time_ms": 2.513}, "training_iteration": 29, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.05629229545593, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 34800, "timesteps_total": 34800, "custom_metrics": {}, "iterations_since_restore": 29, "episodes_this_iter": 24, "episode_reward_min": -99.95508627430446, "date": "2025-09-04_16-34-15", "episode_reward_max": -57.801233031301635, "pid": 3651948, "timestamp": 1756996455, "episode_reward_mean": -97.0966372787425, "time_total_s": 1288.9718182086945, "episodes_total": 699, "episode_len_mean": 49.86}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 1328.9700276851654, "info": {"sample_time_ms": 39662.755, "num_steps_trained": 36000, "grad_time_ms": 372.537, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1277.83251953125, "policy_loss": -0.10264497995376587, "vf_explained_var": -0.7749524712562561, "entropy": 15.299591064453125, "cur_lr": 4.999999873689376e-05, "total_loss": 1277.75146484375, "kl": 0.014280046336352825}, "load_time_ms": 0.715, "num_steps_sampled": 36000, "update_time_ms": 2.514}, "training_iteration": 30, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.99820947647095, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 36000, "timesteps_total": 36000, "custom_metrics": {}, "iterations_since_restore": 30, "episodes_this_iter": 25, "episode_reward_min": -99.99093155757775, "date": "2025-09-04_16-34-55", "episode_reward_max": 0.0015531449246815043, "pid": 3651948, "timestamp": 1756996495, "episode_reward_mean": -96.37514610728846, "time_total_s": 1328.9700276851654, "episodes_total": 724, "episode_len_mean": 49.56}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 1369.1333026885986, "info": {"sample_time_ms": 39665.146, "num_steps_trained": 37200, "grad_time_ms": 373.787, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1310.75341796875, "policy_loss": -0.10099545121192932, "vf_explained_var": -0.7382559180259705, "entropy": 15.32412052154541, "cur_lr": 4.999999873689376e-05, "total_loss": 1310.672119140625, "kl": 0.012976918369531631}, "load_time_ms": 0.725, "num_steps_sampled": 37200, "update_time_ms": 2.568}, "training_iteration": 31, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.16327500343323, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 37200, "timesteps_total": 37200, "custom_metrics": {}, "iterations_since_restore": 31, "episodes_this_iter": 25, "episode_reward_min": -99.99093155757775, "date": "2025-09-04_16-35-36", "episode_reward_max": 0.0015531449246815043, "pid": 3651948, "timestamp": 1756996536, "episode_reward_mean": -95.63129160278804, "time_total_s": 1369.1333026885986, "episodes_total": 749, "episode_len_mean": 49.21}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 1408.7570397853851, "info": {"sample_time_ms": 39608.775, "num_steps_trained": 38400, "grad_time_ms": 370.909, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1223.63525390625, "policy_loss": -0.12073878198862076, "vf_explained_var": -0.7618313431739807, "entropy": 15.325020790100098, "cur_lr": 4.999999873689376e-05, "total_loss": 1223.53466796875, "kl": 0.01335633173584938}, "load_time_ms": 0.731, "num_steps_sampled": 38400, "update_time_ms": 2.589}, "training_iteration": 32, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.6237370967865, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 38400, "timesteps_total": 38400, "custom_metrics": {}, "iterations_since_restore": 32, "episodes_this_iter": 24, "episode_reward_min": -99.99093155757775, "date": "2025-09-04_16-36-15", "episode_reward_max": 0.0015531449246815043, "pid": 3651948, "timestamp": 1756996575, "episode_reward_mean": -95.55989689348331, "time_total_s": 1408.7570397853851, "episodes_total": 773, "episode_len_mean": 49.21}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 1448.3962044715881, "info": {"sample_time_ms": 39589.868, "num_steps_trained": 39600, "grad_time_ms": 374.091, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1231.0386962890625, "policy_loss": -0.11493682861328125, "vf_explained_var": -0.7585346698760986, "entropy": 15.28339672088623, "cur_lr": 4.999999873689376e-05, "total_loss": 1230.9471435546875, "kl": 0.015361123718321323}, "load_time_ms": 0.733, "num_steps_sampled": 39600, "update_time_ms": 2.56}, "training_iteration": 33, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.639164686203, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 39600, "timesteps_total": 39600, "custom_metrics": {}, "iterations_since_restore": 33, "episodes_this_iter": 24, "episode_reward_min": -99.99093155757775, "date": "2025-09-04_16-36-55", "episode_reward_max": 0.0015531449246815043, "pid": 3651948, "timestamp": 1756996615, "episode_reward_mean": -95.615816928204, "time_total_s": 1448.3962044715881, "episodes_total": 797, "episode_len_mean": 49.21}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 1488.3905136585236, "info": {"sample_time_ms": 39528.843, "num_steps_trained": 40800, "grad_time_ms": 374.478, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1203.33447265625, "policy_loss": -0.10448554903268814, "vf_explained_var": -0.7520565390586853, "entropy": 15.311238288879395, "cur_lr": 4.999999873689376e-05, "total_loss": 1203.2515869140625, "kl": 0.014262043870985508}, "load_time_ms": 0.723, "num_steps_sampled": 40800, "update_time_ms": 2.637}, "training_iteration": 34, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.994309186935425, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 40800, "timesteps_total": 40800, "custom_metrics": {}, "iterations_since_restore": 34, "episodes_this_iter": 24, "episode_reward_min": -99.73257844882728, "date": "2025-09-04_16-37-35", "episode_reward_max": -16.290060169030422, "pid": 3651948, "timestamp": 1756996655, "episode_reward_mean": -96.62528324751145, "time_total_s": 1488.3905136585236, "episodes_total": 821, "episode_len_mean": 49.65}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 1528.3656723499298, "info": {"sample_time_ms": 39545.009, "num_steps_trained": 42000, "grad_time_ms": 376.545, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1227.427978515625, "policy_loss": -0.11075553297996521, "vf_explained_var": -0.801076352596283, "entropy": 15.274660110473633, "cur_lr": 4.999999873689376e-05, "total_loss": 1227.34033203125, "kl": 0.015269107185304165}, "load_time_ms": 0.725, "num_steps_sampled": 42000, "update_time_ms": 2.623}, "training_iteration": 35, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.97515869140625, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 42000, "timesteps_total": 42000, "custom_metrics": {}, "iterations_since_restore": 35, "episodes_this_iter": 24, "episode_reward_min": -99.73257844882728, "date": "2025-09-04_16-38-15", "episode_reward_max": -77.82174753169423, "pid": 3651948, "timestamp": 1756996695, "episode_reward_mean": -97.0927170250508, "time_total_s": 1528.3656723499298, "episodes_total": 845, "episode_len_mean": 49.95}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 1568.491044998169, "info": {"sample_time_ms": 39596.393, "num_steps_trained": 43200, "grad_time_ms": 374.488, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1230.39404296875, "policy_loss": -0.1102805882692337, "vf_explained_var": -0.7730542421340942, "entropy": 15.220480918884277, "cur_lr": 4.999999873689376e-05, "total_loss": 1230.304931640625, "kl": 0.013823870569467545}, "load_time_ms": 0.728, "num_steps_sampled": 43200, "update_time_ms": 2.645}, "training_iteration": 36, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.125372648239136, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 43200, "timesteps_total": 43200, "custom_metrics": {}, "iterations_since_restore": 36, "episodes_this_iter": 24, "episode_reward_min": -99.99007305617893, "date": "2025-09-04_16-38-55", "episode_reward_max": -77.82174753169423, "pid": 3651948, "timestamp": 1756996735, "episode_reward_mean": -97.28325003911186, "time_total_s": 1568.491044998169, "episodes_total": 869, "episode_len_mean": 49.95}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 1608.1779806613922, "info": {"sample_time_ms": 39594.501, "num_steps_trained": 44400, "grad_time_ms": 374.389, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1233.9117431640625, "policy_loss": -0.1051551029086113, "vf_explained_var": -0.6987488865852356, "entropy": 15.250106811523438, "cur_lr": 4.999999873689376e-05, "total_loss": 1233.82861328125, "kl": 0.014496508985757828}, "load_time_ms": 0.694, "num_steps_sampled": 44400, "update_time_ms": 2.627}, "training_iteration": 37, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.68693566322327, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 44400, "timesteps_total": 44400, "custom_metrics": {}, "iterations_since_restore": 37, "episodes_this_iter": 24, "episode_reward_min": -99.99007305617893, "date": "2025-09-04_16-39-35", "episode_reward_max": -77.82174753169423, "pid": 3651948, "timestamp": 1756996775, "episode_reward_mean": -97.21012824430167, "time_total_s": 1608.1779806613922, "episodes_total": 893, "episode_len_mean": 49.95}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 1648.4379494190216, "info": {"sample_time_ms": 39570.653, "num_steps_trained": 45600, "grad_time_ms": 373.73, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1277.79052734375, "policy_loss": -0.10825362056493759, "vf_explained_var": -0.7801445126533508, "entropy": 15.28171157836914, "cur_lr": 4.999999873689376e-05, "total_loss": 1277.70361328125, "kl": 0.013901184312999249}, "load_time_ms": 0.694, "num_steps_sampled": 45600, "update_time_ms": 2.637}, "training_iteration": 38, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.259968757629395, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 45600, "timesteps_total": 45600, "custom_metrics": {}, "iterations_since_restore": 38, "episodes_this_iter": 24, "episode_reward_min": -99.99007305617893, "date": "2025-09-04_16-40-15", "episode_reward_max": -77.82174753169423, "pid": 3651948, "timestamp": 1756996815, "episode_reward_mean": -97.04979273483048, "time_total_s": 1648.4379494190216, "episodes_total": 917, "episode_len_mean": 49.95}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 1688.43723654747, "info": {"sample_time_ms": 39563.71, "num_steps_trained": 46800, "grad_time_ms": 374.899, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1167.91552734375, "policy_loss": -0.10696208477020264, "vf_explained_var": -0.7553014755249023, "entropy": 15.204646110534668, "cur_lr": 4.999999873689376e-05, "total_loss": 1167.8319091796875, "kl": 0.015371869318187237}, "load_time_ms": 0.703, "num_steps_sampled": 46800, "update_time_ms": 2.655}, "training_iteration": 39, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.999287128448486, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 46800, "timesteps_total": 46800, "custom_metrics": {}, "iterations_since_restore": 39, "episodes_this_iter": 25, "episode_reward_min": -99.99007305617893, "date": "2025-09-04_16-40-55", "episode_reward_max": -15.99934133821527, "pid": 3651948, "timestamp": 1756996855, "episode_reward_mean": -96.36363020491022, "time_total_s": 1688.43723654747, "episodes_total": 942, "episode_len_mean": 49.59}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 1729.0509288311005, "info": {"sample_time_ms": 39625.919, "num_steps_trained": 48000, "grad_time_ms": 374.175, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1148.047119140625, "policy_loss": -0.10330415517091751, "vf_explained_var": -0.7272942662239075, "entropy": 15.175899505615234, "cur_lr": 4.999999873689376e-05, "total_loss": 1147.966552734375, "kl": 0.015090687200427055}, "load_time_ms": 0.721, "num_steps_sampled": 48000, "update_time_ms": 2.669}, "training_iteration": 40, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.61369228363037, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 48000, "timesteps_total": 48000, "custom_metrics": {}, "iterations_since_restore": 40, "episodes_this_iter": 24, "episode_reward_min": -99.95344412932664, "date": "2025-09-04_16-41-36", "episode_reward_max": -15.99934133821527, "pid": 3651948, "timestamp": 1756996896, "episode_reward_mean": -96.24264843934478, "time_total_s": 1729.0509288311005, "episodes_total": 966, "episode_len_mean": 49.59}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 1769.8827843666077, "info": {"sample_time_ms": 39694.499, "num_steps_trained": 49200, "grad_time_ms": 372.5, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1252.537109375, "policy_loss": -0.11457589268684387, "vf_explained_var": -0.7772528529167175, "entropy": 15.200519561767578, "cur_lr": 4.999999873689376e-05, "total_loss": 1252.4432373046875, "kl": 0.013611800968647003}, "load_time_ms": 0.725, "num_steps_sampled": 49200, "update_time_ms": 2.658}, "training_iteration": 41, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.8318555355072, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 49200, "timesteps_total": 49200, "custom_metrics": {}, "iterations_since_restore": 41, "episodes_this_iter": 24, "episode_reward_min": -99.95344412932664, "date": "2025-09-04_16-42-17", "episode_reward_max": -15.99934133821527, "pid": 3651948, "timestamp": 1756996937, "episode_reward_mean": -96.22698868572897, "time_total_s": 1769.8827843666077, "episodes_total": 990, "episode_len_mean": 49.59}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 1809.5630688667297, "info": {"sample_time_ms": 39697.47, "num_steps_trained": 50400, "grad_time_ms": 375.234, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1227.67529296875, "policy_loss": -0.10887836664915085, "vf_explained_var": -0.778679609298706, "entropy": 15.25713062286377, "cur_lr": 4.999999873689376e-05, "total_loss": 1227.587158203125, "kl": 0.013566892594099045}, "load_time_ms": 0.718, "num_steps_sampled": 50400, "update_time_ms": 2.617}, "training_iteration": 42, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.68028450012207, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 50400, "timesteps_total": 50400, "custom_metrics": {}, "iterations_since_restore": 42, "episodes_this_iter": 24, "episode_reward_min": -99.95344412932664, "date": "2025-09-04_16-42-56", "episode_reward_max": -15.99934133821527, "pid": 3651948, "timestamp": 1756996976, "episode_reward_mean": -96.22189939411399, "time_total_s": 1809.5630688667297, "episodes_total": 1014, "episode_len_mean": 49.59}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 1849.4926145076752, "info": {"sample_time_ms": 39729.576, "num_steps_trained": 51600, "grad_time_ms": 372.205, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1097.594970703125, "policy_loss": -0.11680027842521667, "vf_explained_var": -0.6031178832054138, "entropy": 15.132685661315918, "cur_lr": 4.999999873689376e-05, "total_loss": 1097.4976806640625, "kl": 0.012929944321513176}, "load_time_ms": 0.72, "num_steps_sampled": 51600, "update_time_ms": 2.628}, "training_iteration": 43, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.929545640945435, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 51600, "timesteps_total": 51600, "custom_metrics": {}, "iterations_since_restore": 43, "episodes_this_iter": 24, "episode_reward_min": -99.84089460076768, "date": "2025-09-04_16-43-36", "episode_reward_max": -83.19525614553856, "pid": 3651948, "timestamp": 1756997016, "episode_reward_mean": -97.00053722716505, "time_total_s": 1849.4926145076752, "episodes_total": 1038, "episode_len_mean": 49.98}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 1889.2982964515686, "info": {"sample_time_ms": 39710.673, "num_steps_trained": 52800, "grad_time_ms": 372.29, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1327.10302734375, "policy_loss": -0.11665192991495132, "vf_explained_var": -0.7370307445526123, "entropy": 15.16311264038086, "cur_lr": 4.999999873689376e-05, "total_loss": 1327.0091552734375, "kl": 0.014845062047243118}, "load_time_ms": 0.718, "num_steps_sampled": 52800, "update_time_ms": 2.547}, "training_iteration": 44, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.80568194389343, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 52800, "timesteps_total": 52800, "custom_metrics": {}, "iterations_since_restore": 44, "episodes_this_iter": 24, "episode_reward_min": -99.84089460076768, "date": "2025-09-04_16-44-16", "episode_reward_max": -47.30665988731469, "pid": 3651948, "timestamp": 1756997056, "episode_reward_mean": -96.36052394042983, "time_total_s": 1889.2982964515686, "episodes_total": 1062, "episode_len_mean": 49.79}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 1929.0171658992767, "info": {"sample_time_ms": 39686.403, "num_steps_trained": 54000, "grad_time_ms": 370.849, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1300.06591796875, "policy_loss": -0.11423023790121078, "vf_explained_var": -0.7665535807609558, "entropy": 15.056652069091797, "cur_lr": 4.999999873689376e-05, "total_loss": 1299.973388671875, "kl": 0.014357775449752808}, "load_time_ms": 0.715, "num_steps_sampled": 54000, "update_time_ms": 2.623}, "training_iteration": 45, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.71886944770813, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 54000, "timesteps_total": 54000, "custom_metrics": {}, "iterations_since_restore": 45, "episodes_this_iter": 24, "episode_reward_min": -99.84089460076768, "date": "2025-09-04_16-44-56", "episode_reward_max": -47.30665988731469, "pid": 3651948, "timestamp": 1756997096, "episode_reward_mean": -96.35048571213896, "time_total_s": 1929.0171658992767, "episodes_total": 1086, "episode_len_mean": 49.79}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 1968.8651938438416, "info": {"sample_time_ms": 39658.494, "num_steps_trained": 55200, "grad_time_ms": 370.962, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1358.3763427734375, "policy_loss": -0.1065993681550026, "vf_explained_var": -0.79640793800354, "entropy": 15.09638500213623, "cur_lr": 4.999999873689376e-05, "total_loss": 1358.2899169921875, "kl": 0.013298786245286465}, "load_time_ms": 0.713, "num_steps_sampled": 55200, "update_time_ms": 2.665}, "training_iteration": 46, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.84802794456482, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 55200, "timesteps_total": 55200, "custom_metrics": {}, "iterations_since_restore": 46, "episodes_this_iter": 24, "episode_reward_min": -99.84089460076768, "date": "2025-09-04_16-45-36", "episode_reward_max": -47.30665988731469, "pid": 3651948, "timestamp": 1756997136, "episode_reward_mean": -96.46923531968903, "time_total_s": 1968.8651938438416, "episodes_total": 1110, "episode_len_mean": 49.79}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 2010.0142283439636, "info": {"sample_time_ms": 39805.123, "num_steps_trained": 56400, "grad_time_ms": 370.536, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1360.581298828125, "policy_loss": -0.11323577910661697, "vf_explained_var": -0.7683766484260559, "entropy": 15.034567832946777, "cur_lr": 4.999999873689376e-05, "total_loss": 1360.4910888671875, "kl": 0.015101809985935688}, "load_time_ms": 0.717, "num_steps_sampled": 56400, "update_time_ms": 2.664}, "training_iteration": 47, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 41.14903450012207, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 56400, "timesteps_total": 56400, "custom_metrics": {}, "iterations_since_restore": 47, "episodes_this_iter": 24, "episode_reward_min": -99.70491179654027, "date": "2025-09-04_16-46-17", "episode_reward_max": -47.30665988731469, "pid": 3651948, "timestamp": 1756997177, "episode_reward_mean": -96.50030034668707, "time_total_s": 2010.0142283439636, "episodes_total": 1134, "episode_len_mean": 49.79}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 2050.1419506073, "info": {"sample_time_ms": 39793.815, "num_steps_trained": 57600, "grad_time_ms": 368.623, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1230.81640625, "policy_loss": -0.11603689193725586, "vf_explained_var": -0.7617323994636536, "entropy": 14.98969554901123, "cur_lr": 4.999999873689376e-05, "total_loss": 1230.7230224609375, "kl": 0.014867722988128662}, "load_time_ms": 0.721, "num_steps_sampled": 57600, "update_time_ms": 2.658}, "training_iteration": 48, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.12772226333618, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 57600, "timesteps_total": 57600, "custom_metrics": {}, "iterations_since_restore": 48, "episodes_this_iter": 25, "episode_reward_min": -99.70491179654027, "date": "2025-09-04_16-46-57", "episode_reward_max": -50.47800847607699, "pid": 3651948, "timestamp": 1756997217, "episode_reward_mean": -96.78667045656734, "time_total_s": 2050.1419506073, "episodes_total": 1159, "episode_len_mean": 49.83}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 2090.207808494568, "info": {"sample_time_ms": 39800.839, "num_steps_trained": 58800, "grad_time_ms": 368.33, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1210.0145263671875, "policy_loss": -0.1278069019317627, "vf_explained_var": -0.7858371138572693, "entropy": 14.963143348693848, "cur_lr": 4.999999873689376e-05, "total_loss": 1209.9093017578125, "kl": 0.014916043728590012}, "load_time_ms": 0.716, "num_steps_sampled": 58800, "update_time_ms": 2.632}, "training_iteration": 49, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.065857887268066, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 58800, "timesteps_total": 58800, "custom_metrics": {}, "iterations_since_restore": 49, "episodes_this_iter": 24, "episode_reward_min": -99.69965493226601, "date": "2025-09-04_16-47-37", "episode_reward_max": -36.92857428593311, "pid": 3651948, "timestamp": 1756997257, "episode_reward_mean": -96.00154953185834, "time_total_s": 2090.207808494568, "episodes_total": 1183, "episode_len_mean": 49.58}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 2130.0400941371918, "info": {"sample_time_ms": 39722.489, "num_steps_trained": 60000, "grad_time_ms": 368.589, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1181.7391357421875, "policy_loss": -0.11024336516857147, "vf_explained_var": -0.7595869302749634, "entropy": 14.997981071472168, "cur_lr": 4.999999873689376e-05, "total_loss": 1181.6502685546875, "kl": 0.014109021984040737}, "load_time_ms": 0.704, "num_steps_sampled": 60000, "update_time_ms": 2.607}, "training_iteration": 50, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.8322856426239, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 60000, "timesteps_total": 60000, "custom_metrics": {}, "iterations_since_restore": 50, "episodes_this_iter": 26, "episode_reward_min": -99.69965493226601, "date": "2025-09-04_16-48-17", "episode_reward_max": 2.000894818521134, "pid": 3651948, "timestamp": 1756997297, "episode_reward_mean": -94.21600584758427, "time_total_s": 2130.0400941371918, "episodes_total": 1209, "episode_len_mean": 48.88}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 2169.8263907432556, "info": {"sample_time_ms": 39616.721, "num_steps_trained": 61200, "grad_time_ms": 369.792, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1206.790771484375, "policy_loss": -0.10038409382104874, "vf_explained_var": -0.7725622653961182, "entropy": 14.993680953979492, "cur_lr": 4.999999873689376e-05, "total_loss": 1206.7130126953125, "kl": 0.014860378578305244}, "load_time_ms": 0.699, "num_steps_sampled": 61200, "update_time_ms": 2.575}, "training_iteration": 51, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.78629660606384, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 61200, "timesteps_total": 61200, "custom_metrics": {}, "iterations_since_restore": 51, "episodes_this_iter": 24, "episode_reward_min": -99.69965493226601, "date": "2025-09-04_16-48-57", "episode_reward_max": 2.000894818521134, "pid": 3651948, "timestamp": 1756997337, "episode_reward_mean": -94.16899019027835, "time_total_s": 2169.8263907432556, "episodes_total": 1233, "episode_len_mean": 48.88}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 2210.4999437332153, "info": {"sample_time_ms": 39718.94, "num_steps_trained": 62400, "grad_time_ms": 366.862, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1131.96240234375, "policy_loss": -0.11758121848106384, "vf_explained_var": -0.6979755163192749, "entropy": 14.95267105102539, "cur_lr": 4.999999873689376e-05, "total_loss": 1131.8658447265625, "kl": 0.013862605206668377}, "load_time_ms": 0.691, "num_steps_sampled": 62400, "update_time_ms": 2.612}, "training_iteration": 52, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.67355298995972, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 62400, "timesteps_total": 62400, "custom_metrics": {}, "iterations_since_restore": 52, "episodes_this_iter": 26, "episode_reward_min": -99.69965493226601, "date": "2025-09-04_16-49-37", "episode_reward_max": 8.000000567682516, "pid": 3651948, "timestamp": 1756997377, "episode_reward_mean": -92.0585035779024, "time_total_s": 2210.4999437332153, "episodes_total": 1259, "episode_len_mean": 47.93}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 2250.7435400485992, "info": {"sample_time_ms": 39748.417, "num_steps_trained": 63600, "grad_time_ms": 368.77, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1224.529541015625, "policy_loss": -0.09991131722927094, "vf_explained_var": -0.7694526314735413, "entropy": 14.912704467773438, "cur_lr": 4.999999873689376e-05, "total_loss": 1224.4503173828125, "kl": 0.013661215081810951}, "load_time_ms": 0.691, "num_steps_sampled": 63600, "update_time_ms": 2.613}, "training_iteration": 53, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.24359631538391, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 63600, "timesteps_total": 63600, "custom_metrics": {}, "iterations_since_restore": 53, "episodes_this_iter": 25, "episode_reward_min": -99.68827816877031, "date": "2025-09-04_16-50-18", "episode_reward_max": 8.000000567682516, "pid": 3651948, "timestamp": 1756997418, "episode_reward_mean": -91.20529984729008, "time_total_s": 2250.7435400485992, "episodes_total": 1284, "episode_len_mean": 47.52}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 2290.6784195899963, "info": {"sample_time_ms": 39758.858, "num_steps_trained": 64800, "grad_time_ms": 371.236, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1310.67138671875, "policy_loss": -0.10968722403049469, "vf_explained_var": -0.8816094398498535, "entropy": 15.012337684631348, "cur_lr": 4.999999873689376e-05, "total_loss": 1310.582763671875, "kl": 0.013880123384296894}, "load_time_ms": 0.688, "num_steps_sampled": 64800, "update_time_ms": 2.624}, "training_iteration": 54, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.934879541397095, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 64800, "timesteps_total": 64800, "custom_metrics": {}, "iterations_since_restore": 54, "episodes_this_iter": 24, "episode_reward_min": -99.68827816877031, "date": "2025-09-04_16-50-58", "episode_reward_max": 8.000000567682516, "pid": 3651948, "timestamp": 1756997458, "episode_reward_mean": -92.94233478779394, "time_total_s": 2290.6784195899963, "episodes_total": 1308, "episode_len_mean": 48.22}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 2330.734453201294, "info": {"sample_time_ms": 39790.737, "num_steps_trained": 66000, "grad_time_ms": 373.097, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1422.956787109375, "policy_loss": -0.12223473936319351, "vf_explained_var": -0.8387157320976257, "entropy": 15.144838333129883, "cur_lr": 4.999999873689376e-05, "total_loss": 1422.8563232421875, "kl": 0.01441657543182373}, "load_time_ms": 0.702, "num_steps_sampled": 66000, "update_time_ms": 2.597}, "training_iteration": 55, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.05603361129761, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 66000, "timesteps_total": 66000, "custom_metrics": {}, "iterations_since_restore": 55, "episodes_this_iter": 25, "episode_reward_min": -99.68827816877031, "date": "2025-09-04_16-51-38", "episode_reward_max": 8.000000567682516, "pid": 3651948, "timestamp": 1756997498, "episode_reward_mean": -91.4661865953845, "time_total_s": 2330.734453201294, "episodes_total": 1333, "episode_len_mean": 47.67}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 2370.54008436203, "info": {"sample_time_ms": 39784.586, "num_steps_trained": 67200, "grad_time_ms": 375.042, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1342.798828125, "policy_loss": -0.11646595597267151, "vf_explained_var": -0.7877098917961121, "entropy": 15.047779083251953, "cur_lr": 4.999999873689376e-05, "total_loss": 1342.707275390625, "kl": 0.016279883682727814}, "load_time_ms": 0.714, "num_steps_sampled": 67200, "update_time_ms": 2.59}, "training_iteration": 56, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.805631160736084, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 67200, "timesteps_total": 67200, "custom_metrics": {}, "iterations_since_restore": 56, "episodes_this_iter": 24, "episode_reward_min": -99.22056810823626, "date": "2025-09-04_16-52-17", "episode_reward_max": 1.1405470155882025, "pid": 3651948, "timestamp": 1756997537, "episode_reward_mean": -93.87697515324817, "time_total_s": 2370.54008436203, "episodes_total": 1357, "episode_len_mean": 48.79}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 2410.359657764435, "info": {"sample_time_ms": 39652.665, "num_steps_trained": 68400, "grad_time_ms": 373.994, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1294.26953125, "policy_loss": -0.1283871829509735, "vf_explained_var": -0.7179339528083801, "entropy": 14.984747886657715, "cur_lr": 4.999999873689376e-05, "total_loss": 1294.1630859375, "kl": 0.014505099505186081}, "load_time_ms": 0.709, "num_steps_sampled": 68400, "update_time_ms": 2.615}, "training_iteration": 57, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.819573402404785, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 68400, "timesteps_total": 68400, "custom_metrics": {}, "iterations_since_restore": 57, "episodes_this_iter": 24, "episode_reward_min": -99.30733801768991, "date": "2025-09-04_16-52-57", "episode_reward_max": 1.1405470155882025, "pid": 3651948, "timestamp": 1756997577, "episode_reward_mean": -94.32058205387851, "time_total_s": 2410.359657764435, "episodes_total": 1381, "episode_len_mean": 49.01}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 2451.774926185608, "info": {"sample_time_ms": 39781.232, "num_steps_trained": 69600, "grad_time_ms": 374.189, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1268.938720703125, "policy_loss": -0.11768833547830582, "vf_explained_var": -0.7330797910690308, "entropy": 14.87173080444336, "cur_lr": 4.999999873689376e-05, "total_loss": 1268.8406982421875, "kl": 0.01305652316659689}, "load_time_ms": 0.7, "num_steps_sampled": 69600, "update_time_ms": 2.616}, "training_iteration": 58, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 41.415268421173096, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 69600, "timesteps_total": 69600, "custom_metrics": {}, "iterations_since_restore": 58, "episodes_this_iter": 24, "episode_reward_min": -99.30733801768991, "date": "2025-09-04_16-53-39", "episode_reward_max": -3.6157548869232627, "pid": 3651948, "timestamp": 1756997619, "episode_reward_mean": -95.15196150291067, "time_total_s": 2451.774926185608, "episodes_total": 1405, "episode_len_mean": 49.44}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 2492.544373989105, "info": {"sample_time_ms": 39851.185, "num_steps_trained": 70800, "grad_time_ms": 374.531, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1244.011474609375, "policy_loss": -0.11157584190368652, "vf_explained_var": -0.7300561666488647, "entropy": 14.927780151367188, "cur_lr": 4.999999873689376e-05, "total_loss": 1243.9197998046875, "kl": 0.013102485798299313}, "load_time_ms": 0.699, "num_steps_sampled": 70800, "update_time_ms": 2.636}, "training_iteration": 59, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.769447803497314, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 70800, "timesteps_total": 70800, "custom_metrics": {}, "iterations_since_restore": 59, "episodes_this_iter": 25, "episode_reward_min": -99.30733801768991, "date": "2025-09-04_16-54-19", "episode_reward_max": -3.6157548869232627, "pid": 3651948, "timestamp": 1756997659, "episode_reward_mean": -94.71678434113755, "time_total_s": 2492.544373989105, "episodes_total": 1430, "episode_len_mean": 49.24}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 2532.3875205516815, "info": {"sample_time_ms": 39852.211, "num_steps_trained": 72000, "grad_time_ms": 374.575, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1167.0535888671875, "policy_loss": -0.11288302391767502, "vf_explained_var": -0.7880843281745911, "entropy": 14.89885139465332, "cur_lr": 4.999999873689376e-05, "total_loss": 1166.9609375, "kl": 0.013332750648260117}, "load_time_ms": 0.706, "num_steps_sampled": 72000, "update_time_ms": 2.64}, "training_iteration": 60, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.843146562576294, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 72000, "timesteps_total": 72000, "custom_metrics": {}, "iterations_since_restore": 60, "episodes_this_iter": 25, "episode_reward_min": -99.60113338733126, "date": "2025-09-04_16-54-59", "episode_reward_max": -43.26172837301939, "pid": 3651948, "timestamp": 1756997699, "episode_reward_mean": -95.24715711171521, "time_total_s": 2532.3875205516815, "episodes_total": 1455, "episode_len_mean": 49.54}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 2572.3899228572845, "info": {"sample_time_ms": 39874.653, "num_steps_trained": 73200, "grad_time_ms": 373.813, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1214.6409912109375, "policy_loss": -0.11584869027137756, "vf_explained_var": -0.7653178572654724, "entropy": 14.912324905395508, "cur_lr": 4.999999873689376e-05, "total_loss": 1214.54638671875, "kl": 0.014048927463591099}, "load_time_ms": 0.691, "num_steps_sampled": 73200, "update_time_ms": 2.639}, "training_iteration": 61, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.00240230560303, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 73200, "timesteps_total": 73200, "custom_metrics": {}, "iterations_since_restore": 61, "episodes_this_iter": 24, "episode_reward_min": -99.60113338733126, "date": "2025-09-04_16-55-39", "episode_reward_max": -39.9027328754405, "pid": 3651948, "timestamp": 1756997739, "episode_reward_mean": -94.50576187376137, "time_total_s": 2572.3899228572845, "episodes_total": 1479, "episode_len_mean": 49.3}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 2612.1271228790283, "info": {"sample_time_ms": 39778.975, "num_steps_trained": 74400, "grad_time_ms": 375.838, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1259.583984375, "policy_loss": -0.11178527772426605, "vf_explained_var": -0.7795595526695251, "entropy": 14.82375431060791, "cur_lr": 4.999999873689376e-05, "total_loss": 1259.4942626953125, "kl": 0.014546235091984272}, "load_time_ms": 0.695, "num_steps_sampled": 74400, "update_time_ms": 2.646}, "training_iteration": 62, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.737200021743774, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 74400, "timesteps_total": 74400, "custom_metrics": {}, "iterations_since_restore": 62, "episodes_this_iter": 25, "episode_reward_min": -99.97254170911407, "date": "2025-09-04_16-56-19", "episode_reward_max": 2.1591405978752833, "pid": 3651948, "timestamp": 1756997779, "episode_reward_mean": -93.35622147695085, "time_total_s": 2612.1271228790283, "episodes_total": 1504, "episode_len_mean": 48.81}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 2652.1977066993713, "info": {"sample_time_ms": 39763.604, "num_steps_trained": 75600, "grad_time_ms": 373.908, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1356.56787109375, "policy_loss": -0.10862504690885544, "vf_explained_var": -0.7592952847480774, "entropy": 14.951154708862305, "cur_lr": 4.999999873689376e-05, "total_loss": 1356.4798583984375, "kl": 0.013558438047766685}, "load_time_ms": 0.694, "num_steps_sampled": 75600, "update_time_ms": 2.647}, "training_iteration": 63, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.07058382034302, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 75600, "timesteps_total": 75600, "custom_metrics": {}, "iterations_since_restore": 63, "episodes_this_iter": 25, "episode_reward_min": -99.97254170911407, "date": "2025-09-04_16-56-59", "episode_reward_max": 6.000001326755738, "pid": 3651948, "timestamp": 1756997819, "episode_reward_mean": -92.5705083925217, "time_total_s": 2652.1977066993713, "episodes_total": 1529, "episode_len_mean": 48.41}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 2692.412809610367, "info": {"sample_time_ms": 39792.826, "num_steps_trained": 76800, "grad_time_ms": 372.736, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1189.119873046875, "policy_loss": -0.12759803235530853, "vf_explained_var": -0.7264623045921326, "entropy": 14.669437408447266, "cur_lr": 4.999999873689376e-05, "total_loss": 1189.014404296875, "kl": 0.01455807313323021}, "load_time_ms": 0.697, "num_steps_sampled": 76800, "update_time_ms": 2.635}, "training_iteration": 64, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.21510291099548, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 76800, "timesteps_total": 76800, "custom_metrics": {}, "iterations_since_restore": 64, "episodes_this_iter": 24, "episode_reward_min": -99.97254170911407, "date": "2025-09-04_16-57-40", "episode_reward_max": 6.000001326755738, "pid": 3651948, "timestamp": 1756997860, "episode_reward_mean": -92.60180417143867, "time_total_s": 2692.412809610367, "episodes_total": 1553, "episode_len_mean": 48.39}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 2732.490079641342, "info": {"sample_time_ms": 39797.621, "num_steps_trained": 78000, "grad_time_ms": 370.136, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1244.51416015625, "policy_loss": -0.11783421039581299, "vf_explained_var": -0.7883577942848206, "entropy": 14.744547843933105, "cur_lr": 4.999999873689376e-05, "total_loss": 1244.4168701171875, "kl": 0.013561917468905449}, "load_time_ms": 0.677, "num_steps_sampled": 78000, "update_time_ms": 2.622}, "training_iteration": 65, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.07727003097534, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 78000, "timesteps_total": 78000, "custom_metrics": {}, "iterations_since_restore": 65, "episodes_this_iter": 25, "episode_reward_min": -99.97254170911407, "date": "2025-09-04_16-58-20", "episode_reward_max": 6.000001326755738, "pid": 3651948, "timestamp": 1756997900, "episode_reward_mean": -92.77911973728547, "time_total_s": 2732.490079641342, "episodes_total": 1578, "episode_len_mean": 48.36}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 2772.6015956401825, "info": {"sample_time_ms": 39828.28, "num_steps_trained": 79200, "grad_time_ms": 370.099, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1261.6090087890625, "policy_loss": -0.11495360732078552, "vf_explained_var": -0.7529252171516418, "entropy": 14.982555389404297, "cur_lr": 4.999999873689376e-05, "total_loss": 1261.513427734375, "kl": 0.012708180584013462}, "load_time_ms": 0.676, "num_steps_sampled": 79200, "update_time_ms": 2.561}, "training_iteration": 66, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.11151599884033, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 79200, "timesteps_total": 79200, "custom_metrics": {}, "iterations_since_restore": 66, "episodes_this_iter": 25, "episode_reward_min": -99.72677078360388, "date": "2025-09-04_16-59-00", "episode_reward_max": 6.000001326755738, "pid": 3651948, "timestamp": 1756997940, "episode_reward_mean": -93.05071050827317, "time_total_s": 2772.6015956401825, "episodes_total": 1603, "episode_len_mean": 48.6}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 2812.351597547531, "info": {"sample_time_ms": 39820.574, "num_steps_trained": 80400, "grad_time_ms": 370.846, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1198.54150390625, "policy_loss": -0.12078271806240082, "vf_explained_var": -0.8323256969451904, "entropy": 14.662151336669922, "cur_lr": 4.999999873689376e-05, "total_loss": 1198.443603515625, "kl": 0.014926041476428509}, "load_time_ms": 0.682, "num_steps_sampled": 80400, "update_time_ms": 2.553}, "training_iteration": 67, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.75000190734863, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 80400, "timesteps_total": 80400, "custom_metrics": {}, "iterations_since_restore": 67, "episodes_this_iter": 25, "episode_reward_min": -99.72677078360388, "date": "2025-09-04_16-59-39", "episode_reward_max": -33.96508927336994, "pid": 3651948, "timestamp": 1756997979, "episode_reward_mean": -94.36946534877416, "time_total_s": 2812.351597547531, "episodes_total": 1628, "episode_len_mean": 49.14}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 2852.031061410904, "info": {"sample_time_ms": 39645.242, "num_steps_trained": 81600, "grad_time_ms": 372.573, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1221.2003173828125, "policy_loss": -0.10780903697013855, "vf_explained_var": -0.8038766384124756, "entropy": 14.78492546081543, "cur_lr": 4.999999873689376e-05, "total_loss": 1221.1131591796875, "kl": 0.013653003610670567}, "load_time_ms": 0.686, "num_steps_sampled": 81600, "update_time_ms": 2.537}, "training_iteration": 68, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.6794638633728, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 81600, "timesteps_total": 81600, "custom_metrics": {}, "iterations_since_restore": 68, "episodes_this_iter": 24, "episode_reward_min": -99.58787226122642, "date": "2025-09-04_17-00-19", "episode_reward_max": -25.2159638771289, "pid": 3651948, "timestamp": 1756998019, "episode_reward_mean": -94.04441640538226, "time_total_s": 2852.031061410904, "episodes_total": 1652, "episode_len_mean": 48.97}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 2891.8031606674194, "info": {"sample_time_ms": 39547.375, "num_steps_trained": 82800, "grad_time_ms": 370.732, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1223.3162841796875, "policy_loss": -0.11841960996389389, "vf_explained_var": -0.8032306432723999, "entropy": 14.663142204284668, "cur_lr": 4.999999873689376e-05, "total_loss": 1223.2208251953125, "kl": 0.015099359676241875}, "load_time_ms": 0.685, "num_steps_sampled": 82800, "update_time_ms": 2.526}, "training_iteration": 69, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.7720992565155, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 82800, "timesteps_total": 82800, "custom_metrics": {}, "iterations_since_restore": 69, "episodes_this_iter": 25, "episode_reward_min": -99.99162556002155, "date": "2025-09-04_17-00-59", "episode_reward_max": -0.7702540579181019, "pid": 3651948, "timestamp": 1756998059, "episode_reward_mean": -93.08991260626364, "time_total_s": 2891.8031606674194, "episodes_total": 1677, "episode_len_mean": 48.65}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 2931.6492550373077, "info": {"sample_time_ms": 39548.229, "num_steps_trained": 84000, "grad_time_ms": 370.219, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1172.17431640625, "policy_loss": -0.1396070122718811, "vf_explained_var": -0.7034938335418701, "entropy": 14.607905387878418, "cur_lr": 4.999999873689376e-05, "total_loss": 1172.0589599609375, "kl": 0.015890225768089294}, "load_time_ms": 0.671, "num_steps_sampled": 84000, "update_time_ms": 2.544}, "training_iteration": 70, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.846094369888306, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 84000, "timesteps_total": 84000, "custom_metrics": {}, "iterations_since_restore": 70, "episodes_this_iter": 24, "episode_reward_min": -99.99162556002155, "date": "2025-09-04_17-01-39", "episode_reward_max": -0.7702540579181019, "pid": 3651948, "timestamp": 1756998099, "episode_reward_mean": -93.36784562070854, "time_total_s": 2931.6492550373077, "episodes_total": 1701, "episode_len_mean": 48.68}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 2971.3676204681396, "info": {"sample_time_ms": 39519.912, "num_steps_trained": 85200, "grad_time_ms": 370.109, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1292.23876953125, "policy_loss": -0.1328306645154953, "vf_explained_var": -0.7532870769500732, "entropy": 14.943361282348633, "cur_lr": 4.999999873689376e-05, "total_loss": 1292.128173828125, "kl": 0.014537609182298183}, "load_time_ms": 0.674, "num_steps_sampled": 85200, "update_time_ms": 2.531}, "training_iteration": 71, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.71836543083191, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 85200, "timesteps_total": 85200, "custom_metrics": {}, "iterations_since_restore": 71, "episodes_this_iter": 24, "episode_reward_min": -99.99162556002155, "date": "2025-09-04_17-02-19", "episode_reward_max": -0.7702540579181019, "pid": 3651948, "timestamp": 1756998139, "episode_reward_mean": -92.87745475202908, "time_total_s": 2971.3676204681396, "episodes_total": 1725, "episode_len_mean": 48.64}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 3011.002952814102, "info": {"sample_time_ms": 39510.118, "num_steps_trained": 86400, "grad_time_ms": 369.717, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1240.18310546875, "policy_loss": -0.11488083750009537, "vf_explained_var": -0.6741650104522705, "entropy": 14.941850662231445, "cur_lr": 4.999999873689376e-05, "total_loss": 1240.0899658203125, "kl": 0.014264964498579502}, "load_time_ms": 0.671, "num_steps_sampled": 86400, "update_time_ms": 2.534}, "training_iteration": 72, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.635332345962524, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 86400, "timesteps_total": 86400, "custom_metrics": {}, "iterations_since_restore": 72, "episodes_this_iter": 27, "episode_reward_min": -99.99162556002155, "date": "2025-09-04_17-02-58", "episode_reward_max": -0.7702540579181019, "pid": 3651948, "timestamp": 1756998178, "episode_reward_mean": -91.20535606157559, "time_total_s": 3011.002952814102, "episodes_total": 1752, "episode_len_mean": 48.02}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 3050.5706675052643, "info": {"sample_time_ms": 39458.188, "num_steps_trained": 87600, "grad_time_ms": 371.345, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1113.804443359375, "policy_loss": -0.11882533133029938, "vf_explained_var": -0.7840087413787842, "entropy": 14.758036613464355, "cur_lr": 4.999999873689376e-05, "total_loss": 1113.7073974609375, "kl": 0.014246370643377304}, "load_time_ms": 0.665, "num_steps_sampled": 87600, "update_time_ms": 2.549}, "training_iteration": 73, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.56771469116211, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 87600, "timesteps_total": 87600, "custom_metrics": {}, "iterations_since_restore": 73, "episodes_this_iter": 24, "episode_reward_min": -99.6792457992007, "date": "2025-09-04_17-03-38", "episode_reward_max": -15.576939134117044, "pid": 3651948, "timestamp": 1756998218, "episode_reward_mean": -92.18438998251895, "time_total_s": 3050.5706675052643, "episodes_total": 1776, "episode_len_mean": 48.52}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 3091.4167096614838, "info": {"sample_time_ms": 39521.164, "num_steps_trained": 88800, "grad_time_ms": 371.391, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1172.51708984375, "policy_loss": -0.12917476892471313, "vf_explained_var": -0.7146407961845398, "entropy": 14.7467041015625, "cur_lr": 4.999999873689376e-05, "total_loss": 1172.4105224609375, "kl": 0.014921224676072598}, "load_time_ms": 0.674, "num_steps_sampled": 88800, "update_time_ms": 2.571}, "training_iteration": 74, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.84604215621948, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 88800, "timesteps_total": 88800, "custom_metrics": {}, "iterations_since_restore": 74, "episodes_this_iter": 25, "episode_reward_min": -99.6792457992007, "date": "2025-09-04_17-04-19", "episode_reward_max": -3.999500710162776, "pid": 3651948, "timestamp": 1756998259, "episode_reward_mean": -91.32126179142608, "time_total_s": 3091.4167096614838, "episodes_total": 1801, "episode_len_mean": 48.21}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 3131.535984277725, "info": {"sample_time_ms": 39524.336, "num_steps_trained": 90000, "grad_time_ms": 372.342, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1171.7374267578125, "policy_loss": -0.11464173346757889, "vf_explained_var": -0.7707966566085815, "entropy": 14.599848747253418, "cur_lr": 4.999999873689376e-05, "total_loss": 1171.64208984375, "kl": 0.012648598290979862}, "load_time_ms": 0.679, "num_steps_sampled": 90000, "update_time_ms": 2.587}, "training_iteration": 75, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.119274616241455, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 90000, "timesteps_total": 90000, "custom_metrics": {}, "iterations_since_restore": 75, "episodes_this_iter": 24, "episode_reward_min": -99.63079658416484, "date": "2025-09-04_17-04-59", "episode_reward_max": -3.999500710162776, "pid": 3651948, "timestamp": 1756998299, "episode_reward_mean": -92.15717888342807, "time_total_s": 3131.535984277725, "episodes_total": 1825, "episode_len_mean": 48.44}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 3171.6466183662415, "info": {"sample_time_ms": 39524.771, "num_steps_trained": 91200, "grad_time_ms": 371.805, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1277.7818603515625, "policy_loss": -0.11734982579946518, "vf_explained_var": -0.7334659099578857, "entropy": 14.809412956237793, "cur_lr": 4.999999873689376e-05, "total_loss": 1277.6856689453125, "kl": 0.01380773726850748}, "load_time_ms": 0.674, "num_steps_sampled": 91200, "update_time_ms": 2.622}, "training_iteration": 76, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.110634088516235, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 91200, "timesteps_total": 91200, "custom_metrics": {}, "iterations_since_restore": 76, "episodes_this_iter": 24, "episode_reward_min": -99.63079658416484, "date": "2025-09-04_17-05-39", "episode_reward_max": -3.999500710162776, "pid": 3651948, "timestamp": 1756998339, "episode_reward_mean": -92.27201614128585, "time_total_s": 3171.6466183662415, "episodes_total": 1849, "episode_len_mean": 48.75}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 3211.787467956543, "info": {"sample_time_ms": 39563.643, "num_steps_trained": 92400, "grad_time_ms": 372.018, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1321.55908203125, "policy_loss": -0.12810860574245453, "vf_explained_var": -0.7949018478393555, "entropy": 14.77613353729248, "cur_lr": 4.999999873689376e-05, "total_loss": 1321.45458984375, "kl": 0.015502896159887314}, "load_time_ms": 0.678, "num_steps_sampled": 92400, "update_time_ms": 2.623}, "training_iteration": 77, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.140849590301514, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 92400, "timesteps_total": 92400, "custom_metrics": {}, "iterations_since_restore": 77, "episodes_this_iter": 25, "episode_reward_min": -98.73718296318454, "date": "2025-09-04_17-06-19", "episode_reward_max": -3.999500710162776, "pid": 3651948, "timestamp": 1756998379, "episode_reward_mean": -92.93486778479472, "time_total_s": 3211.787467956543, "episodes_total": 1874, "episode_len_mean": 49.01}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 3252.4975650310516, "info": {"sample_time_ms": 39667.332, "num_steps_trained": 93600, "grad_time_ms": 371.34, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1272.25634765625, "policy_loss": -0.12087935954332352, "vf_explained_var": -0.7115300297737122, "entropy": 14.752005577087402, "cur_lr": 4.999999873689376e-05, "total_loss": 1272.1575927734375, "kl": 0.014506997540593147}, "load_time_ms": 0.675, "num_steps_sampled": 93600, "update_time_ms": 2.629}, "training_iteration": 78, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.71009707450867, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 93600, "timesteps_total": 93600, "custom_metrics": {}, "iterations_since_restore": 78, "episodes_this_iter": 24, "episode_reward_min": -98.6604206333207, "date": "2025-09-04_17-07-00", "episode_reward_max": -55.892082802026835, "pid": 3651948, "timestamp": 1756998420, "episode_reward_mean": -94.11221293511282, "time_total_s": 3252.4975650310516, "episodes_total": 1898, "episode_len_mean": 49.55}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 3293.6832132339478, "info": {"sample_time_ms": 39806.354, "num_steps_trained": 94800, "grad_time_ms": 373.642, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1292.762939453125, "policy_loss": -0.11956813931465149, "vf_explained_var": -0.7240657806396484, "entropy": 14.862645149230957, "cur_lr": 4.999999873689376e-05, "total_loss": 1292.663818359375, "kl": 0.013549041002988815}, "load_time_ms": 0.671, "num_steps_sampled": 94800, "update_time_ms": 2.652}, "training_iteration": 79, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 41.18564820289612, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 94800, "timesteps_total": 94800, "custom_metrics": {}, "iterations_since_restore": 79, "episodes_this_iter": 24, "episode_reward_min": -98.6604206333207, "date": "2025-09-04_17-07-41", "episode_reward_max": -55.892082802026835, "pid": 3651948, "timestamp": 1756998461, "episode_reward_mean": -93.72617925382933, "time_total_s": 3293.6832132339478, "episodes_total": 1922, "episode_len_mean": 49.53}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 3333.982837200165, "info": {"sample_time_ms": 39853.736, "num_steps_trained": 96000, "grad_time_ms": 371.603, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1276.9134521484375, "policy_loss": -0.11261190474033356, "vf_explained_var": -0.7686378955841064, "entropy": 15.086308479309082, "cur_lr": 4.999999873689376e-05, "total_loss": 1276.8209228515625, "kl": 0.013302515260875225}, "load_time_ms": 0.668, "num_steps_sampled": 96000, "update_time_ms": 2.649}, "training_iteration": 80, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.29962396621704, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 96000, "timesteps_total": 96000, "custom_metrics": {}, "iterations_since_restore": 80, "episodes_this_iter": 26, "episode_reward_min": -98.6604206333207, "date": "2025-09-04_17-08-21", "episode_reward_max": -8.733419482830186, "pid": 3651948, "timestamp": 1756998501, "episode_reward_mean": -93.15560433947206, "time_total_s": 3333.982837200165, "episodes_total": 1948, "episode_len_mean": 49.12}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 3374.2282209396362, "info": {"sample_time_ms": 39908.36, "num_steps_trained": 97200, "grad_time_ms": 369.724, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1151.8021240234375, "policy_loss": -0.11964704096317291, "vf_explained_var": -0.6800518035888672, "entropy": 14.558051109313965, "cur_lr": 4.999999873689376e-05, "total_loss": 1151.703125, "kl": 0.013682969845831394}, "load_time_ms": 0.672, "num_steps_sampled": 97200, "update_time_ms": 2.634}, "training_iteration": 81, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.245383739471436, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 97200, "timesteps_total": 97200, "custom_metrics": {}, "iterations_since_restore": 81, "episodes_this_iter": 24, "episode_reward_min": -99.26104626550475, "date": "2025-09-04_17-09-02", "episode_reward_max": -8.733419482830186, "pid": 3651948, "timestamp": 1756998542, "episode_reward_mean": -93.1911542427198, "time_total_s": 3374.2282209396362, "episodes_total": 1972, "episode_len_mean": 49.15}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 3413.907021045685, "info": {"sample_time_ms": 39914.318, "num_steps_trained": 98400, "grad_time_ms": 368.136, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1271.07568359375, "policy_loss": -0.11927060037851334, "vf_explained_var": -0.7267799973487854, "entropy": 14.841487884521484, "cur_lr": 4.999999873689376e-05, "total_loss": 1270.9776611328125, "kl": 0.013944336213171482}, "load_time_ms": 0.672, "num_steps_sampled": 98400, "update_time_ms": 2.608}, "training_iteration": 82, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.678800106048584, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 98400, "timesteps_total": 98400, "custom_metrics": {}, "iterations_since_restore": 82, "episodes_this_iter": 24, "episode_reward_min": -99.26104626550475, "date": "2025-09-04_17-09-41", "episode_reward_max": -8.733419482830186, "pid": 3651948, "timestamp": 1756998581, "episode_reward_mean": -93.12007318425577, "time_total_s": 3413.907021045685, "episodes_total": 1996, "episode_len_mean": 49.15}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 3453.756364107132, "info": {"sample_time_ms": 39943.947, "num_steps_trained": 99600, "grad_time_ms": 366.691, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1233.8282470703125, "policy_loss": -0.12820306420326233, "vf_explained_var": -0.7392103672027588, "entropy": 14.829949378967285, "cur_lr": 4.999999873689376e-05, "total_loss": 1233.7203369140625, "kl": 0.013510401360690594}, "load_time_ms": 0.678, "num_steps_sampled": 99600, "update_time_ms": 2.609}, "training_iteration": 83, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.849343061447144, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 99600, "timesteps_total": 99600, "custom_metrics": {}, "iterations_since_restore": 83, "episodes_this_iter": 25, "episode_reward_min": -99.26104626550475, "date": "2025-09-04_17-10-21", "episode_reward_max": -8.733419482830186, "pid": 3651948, "timestamp": 1756998621, "episode_reward_mean": -92.26810527420496, "time_total_s": 3453.756364107132, "episodes_total": 2021, "episode_len_mean": 48.8}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 3493.5548133850098, "info": {"sample_time_ms": 39839.005, "num_steps_trained": 100800, "grad_time_ms": 366.936, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1215.7237548828125, "policy_loss": -0.11922930181026459, "vf_explained_var": -0.7129600048065186, "entropy": 14.783453941345215, "cur_lr": 4.999999873689376e-05, "total_loss": 1215.6236572265625, "kl": 0.012615455314517021}, "load_time_ms": 0.678, "num_steps_sampled": 100800, "update_time_ms": 2.588}, "training_iteration": 84, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.79844927787781, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 100800, "timesteps_total": 100800, "custom_metrics": {}, "iterations_since_restore": 84, "episodes_this_iter": 24, "episode_reward_min": -99.26104626550475, "date": "2025-09-04_17-11-01", "episode_reward_max": -13.917609120055879, "pid": 3651948, "timestamp": 1756998661, "episode_reward_mean": -92.80197099116748, "time_total_s": 3493.5548133850098, "episodes_total": 2045, "episode_len_mean": 49.17}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 3533.712546348572, "info": {"sample_time_ms": 39842.159, "num_steps_trained": 102000, "grad_time_ms": 367.639, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1278.0333251953125, "policy_loss": -0.1087045967578888, "vf_explained_var": -0.7911555767059326, "entropy": 14.793651580810547, "cur_lr": 4.999999873689376e-05, "total_loss": 1277.946044921875, "kl": 0.014129284769296646}, "load_time_ms": 0.672, "num_steps_sampled": 102000, "update_time_ms": 2.585}, "training_iteration": 85, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.15773296356201, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 102000, "timesteps_total": 102000, "custom_metrics": {}, "iterations_since_restore": 85, "episodes_this_iter": 25, "episode_reward_min": -98.52128822180254, "date": "2025-09-04_17-11-41", "episode_reward_max": -13.917609120055879, "pid": 3651948, "timestamp": 1756998701, "episode_reward_mean": -92.42686715994115, "time_total_s": 3533.712546348572, "episodes_total": 2070, "episode_len_mean": 48.98}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 3573.3178622722626, "info": {"sample_time_ms": 39791.294, "num_steps_trained": 103200, "grad_time_ms": 367.951, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1206.2105712890625, "policy_loss": -0.11693794280290604, "vf_explained_var": -0.7791456580162048, "entropy": 14.77348518371582, "cur_lr": 4.999999873689376e-05, "total_loss": 1206.1171875, "kl": 0.015465127304196358}, "load_time_ms": 0.669, "num_steps_sampled": 103200, "update_time_ms": 2.611}, "training_iteration": 86, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.605315923690796, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 103200, "timesteps_total": 103200, "custom_metrics": {}, "iterations_since_restore": 86, "episodes_this_iter": 25, "episode_reward_min": -98.52128822180254, "date": "2025-09-04_17-12-21", "episode_reward_max": -13.917609120055879, "pid": 3651948, "timestamp": 1756998741, "episode_reward_mean": -91.01069362105486, "time_total_s": 3573.3178622722626, "episodes_total": 2095, "episode_len_mean": 48.48}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 3613.9034507274628, "info": {"sample_time_ms": 39835.727, "num_steps_trained": 104400, "grad_time_ms": 368.041, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1148.5584716796875, "policy_loss": -0.12584802508354187, "vf_explained_var": -0.7068888545036316, "entropy": 14.55543327331543, "cur_lr": 4.999999873689376e-05, "total_loss": 1148.4549560546875, "kl": 0.014751442708075047}, "load_time_ms": 0.67, "num_steps_sampled": 104400, "update_time_ms": 2.583}, "training_iteration": 87, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.585588455200195, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 104400, "timesteps_total": 104400, "custom_metrics": {}, "iterations_since_restore": 87, "episodes_this_iter": 25, "episode_reward_min": -98.72888046726543, "date": "2025-09-04_17-13-02", "episode_reward_max": -25.99462355474143, "pid": 3651948, "timestamp": 1756998782, "episode_reward_mean": -90.95479067292243, "time_total_s": 3613.9034507274628, "episodes_total": 2120, "episode_len_mean": 48.46}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 3654.366242647171, "info": {"sample_time_ms": 39810.132, "num_steps_trained": 105600, "grad_time_ms": 368.977, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1196.8033447265625, "policy_loss": -0.12734419107437134, "vf_explained_var": -0.647044837474823, "entropy": 14.450883865356445, "cur_lr": 4.999999873689376e-05, "total_loss": 1196.6951904296875, "kl": 0.012654243037104607}, "load_time_ms": 0.676, "num_steps_sampled": 105600, "update_time_ms": 2.584}, "training_iteration": 88, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.46279191970825, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 105600, "timesteps_total": 105600, "custom_metrics": {}, "iterations_since_restore": 88, "episodes_this_iter": 24, "episode_reward_min": -98.77030807707597, "date": "2025-09-04_17-13-42", "episode_reward_max": -5.722962107342848, "pid": 3651948, "timestamp": 1756998822, "episode_reward_mean": -90.80268153204094, "time_total_s": 3654.366242647171, "episodes_total": 2144, "episode_len_mean": 48.24}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 3694.164056777954, "info": {"sample_time_ms": 39671.88, "num_steps_trained": 106800, "grad_time_ms": 368.472, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1234.7669677734375, "policy_loss": -0.12505359947681427, "vf_explained_var": -0.7778708338737488, "entropy": 14.69221305847168, "cur_lr": 4.999999873689376e-05, "total_loss": 1234.66162109375, "kl": 0.012969114817678928}, "load_time_ms": 0.685, "num_steps_sampled": 106800, "update_time_ms": 2.551}, "training_iteration": 89, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.79781413078308, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 106800, "timesteps_total": 106800, "custom_metrics": {}, "iterations_since_restore": 89, "episodes_this_iter": 26, "episode_reward_min": -99.54022066114896, "date": "2025-09-04_17-14-22", "episode_reward_max": -5.722962107342848, "pid": 3651948, "timestamp": 1756998862, "episode_reward_mean": -89.86875535557246, "time_total_s": 3694.164056777954, "episodes_total": 2170, "episode_len_mean": 47.89}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 3733.7502102851868, "info": {"sample_time_ms": 39599.99, "num_steps_trained": 108000, "grad_time_ms": 369.036, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1285.6556396484375, "policy_loss": -0.12674559652805328, "vf_explained_var": -0.8289951682090759, "entropy": 14.869439125061035, "cur_lr": 4.999999873689376e-05, "total_loss": 1285.5496826171875, "kl": 0.01376924104988575}, "load_time_ms": 0.685, "num_steps_sampled": 108000, "update_time_ms": 2.537}, "training_iteration": 90, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.586153507232666, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 108000, "timesteps_total": 108000, "custom_metrics": {}, "iterations_since_restore": 90, "episodes_this_iter": 24, "episode_reward_min": -99.54022066114896, "date": "2025-09-04_17-15-01", "episode_reward_max": -5.722962107342848, "pid": 3651948, "timestamp": 1756998901, "episode_reward_mean": -91.13390358708877, "time_total_s": 3733.7502102851868, "episodes_total": 2194, "episode_len_mean": 48.39}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 3774.2825310230255, "info": {"sample_time_ms": 39628.094, "num_steps_trained": 109200, "grad_time_ms": 369.618, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1224.0098876953125, "policy_loss": -0.10955886542797089, "vf_explained_var": -0.7773178815841675, "entropy": 14.64888858795166, "cur_lr": 4.999999873689376e-05, "total_loss": 1223.9169921875, "kl": 0.011099190451204777}, "load_time_ms": 0.702, "num_steps_sampled": 109200, "update_time_ms": 2.519}, "training_iteration": 91, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.532320737838745, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 109200, "timesteps_total": 109200, "custom_metrics": {}, "iterations_since_restore": 91, "episodes_this_iter": 25, "episode_reward_min": -99.54022066114896, "date": "2025-09-04_17-15-42", "episode_reward_max": -5.722962107342848, "pid": 3651948, "timestamp": 1756998942, "episode_reward_mean": -91.20847519918264, "time_total_s": 3774.2825310230255, "episodes_total": 2219, "episode_len_mean": 48.4}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 3814.0616085529327, "info": {"sample_time_ms": 39634.879, "num_steps_trained": 110400, "grad_time_ms": 372.838, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1189.8040771484375, "policy_loss": -0.12425579130649567, "vf_explained_var": -0.7224305272102356, "entropy": 14.687872886657715, "cur_lr": 4.999999873689376e-05, "total_loss": 1189.700927734375, "kl": 0.013974593952298164}, "load_time_ms": 0.705, "num_steps_sampled": 110400, "update_time_ms": 2.504}, "training_iteration": 92, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.77907752990723, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 110400, "timesteps_total": 110400, "custom_metrics": {}, "iterations_since_restore": 92, "episodes_this_iter": 26, "episode_reward_min": -99.54022066114896, "date": "2025-09-04_17-16-22", "episode_reward_max": 1.4474787914261587, "pid": 3651948, "timestamp": 1756998982, "episode_reward_mean": -89.48766277537257, "time_total_s": 3814.0616085529327, "episodes_total": 2245, "episode_len_mean": 47.8}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 3854.6090116500854, "info": {"sample_time_ms": 39702.751, "num_steps_trained": 111600, "grad_time_ms": 374.732, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1208.7021484375, "policy_loss": -0.11913042515516281, "vf_explained_var": -0.6939985752105713, "entropy": 14.28215503692627, "cur_lr": 4.999999873689376e-05, "total_loss": 1208.60400390625, "kl": 0.013752754777669907}, "load_time_ms": 0.7, "num_steps_sampled": 111600, "update_time_ms": 2.531}, "training_iteration": 93, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.54740309715271, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 111600, "timesteps_total": 111600, "custom_metrics": {}, "iterations_since_restore": 93, "episodes_this_iter": 24, "episode_reward_min": -99.36198879809118, "date": "2025-09-04_17-17-02", "episode_reward_max": 1.4474787914261587, "pid": 3651948, "timestamp": 1756999022, "episode_reward_mean": -90.58949901698992, "time_total_s": 3854.6090116500854, "episodes_total": 2269, "episode_len_mean": 48.22}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 3894.819942712784, "info": {"sample_time_ms": 39744.693, "num_steps_trained": 112800, "grad_time_ms": 374.001, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1184.99560546875, "policy_loss": -0.12463506311178207, "vf_explained_var": -0.643320620059967, "entropy": 14.60263442993164, "cur_lr": 4.999999873689376e-05, "total_loss": 1184.8944091796875, "kl": 0.015387635678052902}, "load_time_ms": 0.691, "num_steps_sampled": 112800, "update_time_ms": 2.53}, "training_iteration": 94, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.210931062698364, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 112800, "timesteps_total": 112800, "custom_metrics": {}, "iterations_since_restore": 94, "episodes_this_iter": 25, "episode_reward_min": -99.36198879809118, "date": "2025-09-04_17-17-43", "episode_reward_max": 1.4474787914261587, "pid": 3651948, "timestamp": 1756999063, "episode_reward_mean": -89.95456546353162, "time_total_s": 3894.819942712784, "episodes_total": 2294, "episode_len_mean": 48.04}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 3934.89609003067, "info": {"sample_time_ms": 39737.281, "num_steps_trained": 114000, "grad_time_ms": 373.233, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1267.3551025390625, "policy_loss": -0.12714111804962158, "vf_explained_var": -0.7729015350341797, "entropy": 14.54859447479248, "cur_lr": 4.999999873689376e-05, "total_loss": 1267.2496337890625, "kl": 0.014290733262896538}, "load_time_ms": 0.689, "num_steps_sampled": 114000, "update_time_ms": 2.541}, "training_iteration": 95, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.07614731788635, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 114000, "timesteps_total": 114000, "custom_metrics": {}, "iterations_since_restore": 95, "episodes_this_iter": 25, "episode_reward_min": -99.36198879809118, "date": "2025-09-04_17-18-23", "episode_reward_max": 1.4474787914261587, "pid": 3651948, "timestamp": 1756999103, "episode_reward_mean": -89.21487080403091, "time_total_s": 3934.89609003067, "episodes_total": 2319, "episode_len_mean": 47.81}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 3974.9890925884247, "info": {"sample_time_ms": 39785.839, "num_steps_trained": 115200, "grad_time_ms": 373.399, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1199.9796142578125, "policy_loss": -0.12493264675140381, "vf_explained_var": -0.7098046541213989, "entropy": 14.439332962036133, "cur_lr": 4.999999873689376e-05, "total_loss": 1199.87451171875, "kl": 0.013081979006528854}, "load_time_ms": 0.695, "num_steps_sampled": 115200, "update_time_ms": 2.574}, "training_iteration": 96, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.09300255775452, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 115200, "timesteps_total": 115200, "custom_metrics": {}, "iterations_since_restore": 96, "episodes_this_iter": 25, "episode_reward_min": -99.36198879809118, "date": "2025-09-04_17-19-03", "episode_reward_max": -28.275172311855314, "pid": 3651948, "timestamp": 1756999143, "episode_reward_mean": -90.65294534009193, "time_total_s": 3974.9890925884247, "episodes_total": 2344, "episode_len_mean": 48.51}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 4014.6931591033936, "info": {"sample_time_ms": 39698.153, "num_steps_trained": 116400, "grad_time_ms": 373.004, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1210.350341796875, "policy_loss": -0.11625361442565918, "vf_explained_var": -0.7325482368469238, "entropy": 14.540125846862793, "cur_lr": 4.999999873689376e-05, "total_loss": 1210.2540283203125, "kl": 0.013076062314212322}, "load_time_ms": 0.68, "num_steps_sampled": 116400, "update_time_ms": 2.558}, "training_iteration": 97, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.70406651496887, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 116400, "timesteps_total": 116400, "custom_metrics": {}, "iterations_since_restore": 97, "episodes_this_iter": 24, "episode_reward_min": -97.95559873759191, "date": "2025-09-04_17-19-43", "episode_reward_max": -35.95092867650534, "pid": 3651948, "timestamp": 1756999183, "episode_reward_mean": -91.17967484303084, "time_total_s": 4014.6931591033936, "episodes_total": 2368, "episode_len_mean": 48.85}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 4055.768358230591, "info": {"sample_time_ms": 39759.339, "num_steps_trained": 117600, "grad_time_ms": 373.009, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1190.3453369140625, "policy_loss": -0.11981771886348724, "vf_explained_var": -0.6935294270515442, "entropy": 14.405661582946777, "cur_lr": 4.999999873689376e-05, "total_loss": 1190.245849609375, "kl": 0.013433661311864853}, "load_time_ms": 0.685, "num_steps_sampled": 117600, "update_time_ms": 2.552}, "training_iteration": 98, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 41.075199127197266, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 117600, "timesteps_total": 117600, "custom_metrics": {}, "iterations_since_restore": 98, "episodes_this_iter": 25, "episode_reward_min": -97.95559873759191, "date": "2025-09-04_17-20-24", "episode_reward_max": 0.5107333925751831, "pid": 3651948, "timestamp": 1756999224, "episode_reward_mean": -89.58005055925892, "time_total_s": 4055.768358230591, "episodes_total": 2393, "episode_len_mean": 48.15}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 4095.5292184352875, "info": {"sample_time_ms": 39757.793, "num_steps_trained": 118800, "grad_time_ms": 370.868, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1296.869140625, "policy_loss": -0.1273031383752823, "vf_explained_var": -0.7066032886505127, "entropy": 14.593509674072266, "cur_lr": 4.999999873689376e-05, "total_loss": 1296.7630615234375, "kl": 0.013859516941010952}, "load_time_ms": 0.679, "num_steps_sampled": 118800, "update_time_ms": 2.572}, "training_iteration": 99, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.760860204696655, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 118800, "timesteps_total": 118800, "custom_metrics": {}, "iterations_since_restore": 99, "episodes_this_iter": 24, "episode_reward_min": -97.95559873759191, "date": "2025-09-04_17-21-03", "episode_reward_max": 0.5107333925751831, "pid": 3651948, "timestamp": 1756999263, "episode_reward_mean": -90.90190257334996, "time_total_s": 4095.5292184352875, "episodes_total": 2417, "episode_len_mean": 48.75}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 4135.280607700348, "info": {"sample_time_ms": 39771.754, "num_steps_trained": 120000, "grad_time_ms": 373.397, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1311.7376708984375, "policy_loss": -0.13152579963207245, "vf_explained_var": -0.6952612996101379, "entropy": 14.473356246948242, "cur_lr": 4.999999873689376e-05, "total_loss": 1311.62744140625, "kl": 0.014029532670974731}, "load_time_ms": 0.689, "num_steps_sampled": 120000, "update_time_ms": 2.588}, "training_iteration": 100, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.751389265060425, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 120000, "timesteps_total": 120000, "custom_metrics": {}, "iterations_since_restore": 100, "episodes_this_iter": 25, "episode_reward_min": -98.5273936104996, "date": "2025-09-04_17-21-43", "episode_reward_max": 0.5107333925751831, "pid": 3651948, "timestamp": 1756999303, "episode_reward_mean": -90.455652480023, "time_total_s": 4135.280607700348, "episodes_total": 2442, "episode_len_mean": 48.5}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 4175.344631195068, "info": {"sample_time_ms": 39722.412, "num_steps_trained": 121200, "grad_time_ms": 375.911, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1326.9588623046875, "policy_loss": -0.14080630242824554, "vf_explained_var": -0.7763766050338745, "entropy": 14.496570587158203, "cur_lr": 4.999999873689376e-05, "total_loss": 1326.8421630859375, "kl": 0.015901949256658554}, "load_time_ms": 0.675, "num_steps_sampled": 121200, "update_time_ms": 2.616}, "training_iteration": 101, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.06402349472046, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 121200, "timesteps_total": 121200, "custom_metrics": {}, "iterations_since_restore": 101, "episodes_this_iter": 26, "episode_reward_min": -98.5273936104996, "date": "2025-09-04_17-22-23", "episode_reward_max": 6.000664555683574, "pid": 3651948, "timestamp": 1756999343, "episode_reward_mean": -88.72845558193077, "time_total_s": 4175.344631195068, "episodes_total": 2468, "episode_len_mean": 47.76}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 4215.645416736603, "info": {"sample_time_ms": 39774.876, "num_steps_trained": 122400, "grad_time_ms": 375.573, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1194.6558837890625, "policy_loss": -0.1253173053264618, "vf_explained_var": -0.7658072710037231, "entropy": 14.514446258544922, "cur_lr": 4.999999873689376e-05, "total_loss": 1194.5496826171875, "kl": 0.012578372843563557}, "load_time_ms": 0.679, "num_steps_sampled": 122400, "update_time_ms": 2.664}, "training_iteration": 102, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.300785541534424, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 122400, "timesteps_total": 122400, "custom_metrics": {}, "iterations_since_restore": 102, "episodes_this_iter": 25, "episode_reward_min": -98.5273936104996, "date": "2025-09-04_17-23-04", "episode_reward_max": 6.000664555683574, "pid": 3651948, "timestamp": 1756999384, "episode_reward_mean": -89.66162941043045, "time_total_s": 4215.645416736603, "episodes_total": 2493, "episode_len_mean": 48.25}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 4255.7618935108185, "info": {"sample_time_ms": 39733.237, "num_steps_trained": 123600, "grad_time_ms": 374.02, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1156.4312744140625, "policy_loss": -0.1374259740114212, "vf_explained_var": -0.6683142781257629, "entropy": 14.21220874786377, "cur_lr": 4.999999873689376e-05, "total_loss": 1156.3155517578125, "kl": 0.014314512722194195}, "load_time_ms": 0.678, "num_steps_sampled": 123600, "update_time_ms": 2.677}, "training_iteration": 103, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.1164767742157, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 123600, "timesteps_total": 123600, "custom_metrics": {}, "iterations_since_restore": 103, "episodes_this_iter": 25, "episode_reward_min": -98.5273936104996, "date": "2025-09-04_17-23-44", "episode_reward_max": 6.000664555683574, "pid": 3651948, "timestamp": 1756999424, "episode_reward_mean": -88.23683398251121, "time_total_s": 4255.7618935108185, "episodes_total": 2518, "episode_len_mean": 47.59}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 4297.668802499771, "info": {"sample_time_ms": 39901.639, "num_steps_trained": 124800, "grad_time_ms": 375.174, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1237.98388671875, "policy_loss": -0.12278148531913757, "vf_explained_var": -0.6365931630134583, "entropy": 14.33677864074707, "cur_lr": 4.999999873689376e-05, "total_loss": 1237.88037109375, "kl": 0.012673246674239635}, "load_time_ms": 0.697, "num_steps_sampled": 124800, "update_time_ms": 2.728}, "training_iteration": 104, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 41.90690898895264, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 124800, "timesteps_total": 124800, "custom_metrics": {}, "iterations_since_restore": 104, "episodes_this_iter": 28, "episode_reward_min": -97.46309625411676, "date": "2025-09-04_17-24-26", "episode_reward_max": 6.000664555683574, "pid": 3651948, "timestamp": 1756999466, "episode_reward_mean": -84.16640483824541, "time_total_s": 4297.668802499771, "episodes_total": 2546, "episode_len_mean": 45.78}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 4337.651317119598, "info": {"sample_time_ms": 39889.857, "num_steps_trained": 126000, "grad_time_ms": 377.575, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1278.95947265625, "policy_loss": -0.12983882427215576, "vf_explained_var": -0.6369035840034485, "entropy": 14.396255493164062, "cur_lr": 4.999999873689376e-05, "total_loss": 1278.85009765625, "kl": 0.013471391052007675}, "load_time_ms": 0.715, "num_steps_sampled": 126000, "update_time_ms": 2.716}, "training_iteration": 105, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.98251461982727, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 126000, "timesteps_total": 126000, "custom_metrics": {}, "iterations_since_restore": 105, "episodes_this_iter": 25, "episode_reward_min": -97.45426535558042, "date": "2025-09-04_17-25-06", "episode_reward_max": 6.000003544694097, "pid": 3651948, "timestamp": 1756999506, "episode_reward_mean": -85.2422938899056, "time_total_s": 4337.651317119598, "episodes_total": 2571, "episode_len_mean": 46.3}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 4378.0724902153015, "info": {"sample_time_ms": 39924.688, "num_steps_trained": 127200, "grad_time_ms": 375.586, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1178.966552734375, "policy_loss": -0.1314598172903061, "vf_explained_var": -0.6940706968307495, "entropy": 14.413055419921875, "cur_lr": 4.999999873689376e-05, "total_loss": 1178.8572998046875, "kl": 0.014483694918453693}, "load_time_ms": 0.708, "num_steps_sampled": 127200, "update_time_ms": 2.682}, "training_iteration": 106, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.421173095703125, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 127200, "timesteps_total": 127200, "custom_metrics": {}, "iterations_since_restore": 106, "episodes_this_iter": 28, "episode_reward_min": -97.15961541343583, "date": "2025-09-04_17-25-46", "episode_reward_max": 6.000003544694097, "pid": 3651948, "timestamp": 1756999546, "episode_reward_mean": -82.70880042113075, "time_total_s": 4378.0724902153015, "episodes_total": 2599, "episode_len_mean": 45.08}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 4419.78364610672, "info": {"sample_time_ms": 40123.747, "num_steps_trained": 128400, "grad_time_ms": 377.182, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1233.5357666015625, "policy_loss": -0.12721286714076996, "vf_explained_var": -0.7277848720550537, "entropy": 14.43802261352539, "cur_lr": 4.999999873689376e-05, "total_loss": 1233.4302978515625, "kl": 0.014336716383695602}, "load_time_ms": 0.729, "num_steps_sampled": 128400, "update_time_ms": 2.697}, "training_iteration": 107, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 41.71115589141846, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 128400, "timesteps_total": 128400, "custom_metrics": {}, "iterations_since_restore": 107, "episodes_this_iter": 25, "episode_reward_min": -98.2978558218741, "date": "2025-09-04_17-26-28", "episode_reward_max": 2.1440509234017577, "pid": 3651948, "timestamp": 1756999588, "episode_reward_mean": -84.86409343511512, "time_total_s": 4419.78364610672, "episodes_total": 2624, "episode_len_mean": 46.06}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 4460.092601776123, "info": {"sample_time_ms": 40046.852, "num_steps_trained": 129600, "grad_time_ms": 377.472, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1257.62548828125, "policy_loss": -0.12686073780059814, "vf_explained_var": -0.6250575184822083, "entropy": 14.22714614868164, "cur_lr": 4.999999873689376e-05, "total_loss": 1257.5218505859375, "kl": 0.015301553532481194}, "load_time_ms": 0.724, "num_steps_sampled": 129600, "update_time_ms": 2.693}, "training_iteration": 108, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.308955669403076, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 129600, "timesteps_total": 129600, "custom_metrics": {}, "iterations_since_restore": 108, "episodes_this_iter": 25, "episode_reward_min": -98.2978558218741, "date": "2025-09-04_17-27-08", "episode_reward_max": 3.0193488702176747, "pid": 3651948, "timestamp": 1756999628, "episode_reward_mean": -86.37986970879447, "time_total_s": 4460.092601776123, "episodes_total": 2649, "episode_len_mean": 46.67}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 4500.163435935974, "info": {"sample_time_ms": 40077.132, "num_steps_trained": 130800, "grad_time_ms": 378.158, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1223.0604248046875, "policy_loss": -0.13701944053173065, "vf_explained_var": -0.6192005276679993, "entropy": 14.382627487182617, "cur_lr": 4.999999873689376e-05, "total_loss": 1222.9482421875, "kl": 0.016335275024175644}, "load_time_ms": 0.723, "num_steps_sampled": 130800, "update_time_ms": 2.697}, "training_iteration": 109, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.070834159851074, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 130800, "timesteps_total": 130800, "custom_metrics": {}, "iterations_since_restore": 109, "episodes_this_iter": 26, "episode_reward_min": -98.2978558218741, "date": "2025-09-04_17-27-48", "episode_reward_max": 3.0193488702176747, "pid": 3651948, "timestamp": 1756999668, "episode_reward_mean": -86.45667733670398, "time_total_s": 4500.163435935974, "episodes_total": 2675, "episode_len_mean": 46.77}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 4541.299084186554, "info": {"sample_time_ms": 40215.232, "num_steps_trained": 132000, "grad_time_ms": 378.476, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1271.205322265625, "policy_loss": -0.13974149525165558, "vf_explained_var": -0.5736344456672668, "entropy": 14.173750877380371, "cur_lr": 4.999999873689376e-05, "total_loss": 1271.0859375, "kl": 0.01340469066053629}, "load_time_ms": 0.725, "num_steps_sampled": 132000, "update_time_ms": 2.696}, "training_iteration": 110, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 41.135648250579834, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 132000, "timesteps_total": 132000, "custom_metrics": {}, "iterations_since_restore": 110, "episodes_this_iter": 27, "episode_reward_min": -99.64931377321552, "date": "2025-09-04_17-28-29", "episode_reward_max": 3.0193488702176747, "pid": 3651948, "timestamp": 1756999709, "episode_reward_mean": -85.65938928896858, "time_total_s": 4541.299084186554, "episodes_total": 2702, "episode_len_mean": 46.35}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 4581.27139878273, "info": {"sample_time_ms": 40206.484, "num_steps_trained": 133200, "grad_time_ms": 378.024, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1239.5079345703125, "policy_loss": -0.12589646875858307, "vf_explained_var": -0.6094751954078674, "entropy": 14.06795883178711, "cur_lr": 4.999999873689376e-05, "total_loss": 1239.40234375, "kl": 0.013416077941656113}, "load_time_ms": 0.727, "num_steps_sampled": 133200, "update_time_ms": 2.708}, "training_iteration": 111, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.97231459617615, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 133200, "timesteps_total": 133200, "custom_metrics": {}, "iterations_since_restore": 111, "episodes_this_iter": 24, "episode_reward_min": -99.64931377321552, "date": "2025-09-04_17-29-10", "episode_reward_max": 3.0193488702176747, "pid": 3651948, "timestamp": 1756999750, "episode_reward_mean": -85.355216547187, "time_total_s": 4581.27139878273, "episodes_total": 2726, "episode_len_mean": 46.29}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 4621.051965236664, "info": {"sample_time_ms": 40156.241, "num_steps_trained": 134400, "grad_time_ms": 376.258, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1339.635009765625, "policy_loss": -0.1412985920906067, "vf_explained_var": -0.592364490032196, "entropy": 14.377864837646484, "cur_lr": 4.999999873689376e-05, "total_loss": 1339.51513671875, "kl": 0.014054707251489162}, "load_time_ms": 0.718, "num_steps_sampled": 134400, "update_time_ms": 2.694}, "training_iteration": 112, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.780566453933716, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 134400, "timesteps_total": 134400, "custom_metrics": {}, "iterations_since_restore": 112, "episodes_this_iter": 25, "episode_reward_min": -99.64931377321552, "date": "2025-09-04_17-29-49", "episode_reward_max": -1.849715851617404, "pid": 3651948, "timestamp": 1756999789, "episode_reward_mean": -86.26141017081436, "time_total_s": 4621.051965236664, "episodes_total": 2751, "episode_len_mean": 46.8}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 4661.2936680316925, "info": {"sample_time_ms": 40167.92, "num_steps_trained": 135600, "grad_time_ms": 377.197, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1229.363037109375, "policy_loss": -0.1360459327697754, "vf_explained_var": -0.5663503408432007, "entropy": 14.140381813049316, "cur_lr": 4.999999873689376e-05, "total_loss": 1229.2481689453125, "kl": 0.013758447952568531}, "load_time_ms": 0.727, "num_steps_sampled": 135600, "update_time_ms": 2.661}, "training_iteration": 113, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.24170279502869, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 135600, "timesteps_total": 135600, "custom_metrics": {}, "iterations_since_restore": 113, "episodes_this_iter": 28, "episode_reward_min": -99.64931377321552, "date": "2025-09-04_17-30-30", "episode_reward_max": -1.849715851617404, "pid": 3651948, "timestamp": 1756999830, "episode_reward_mean": -84.13588032368352, "time_total_s": 4661.2936680316925, "episodes_total": 2779, "episode_len_mean": 45.91}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 4701.200223684311, "info": {"sample_time_ms": 39968.296, "num_steps_trained": 136800, "grad_time_ms": 376.908, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1215.121337890625, "policy_loss": -0.1374468207359314, "vf_explained_var": -0.622351884841919, "entropy": 14.042511940002441, "cur_lr": 4.999999873689376e-05, "total_loss": 1215.00537109375, "kl": 0.014193039387464523}, "load_time_ms": 0.713, "num_steps_sampled": 136800, "update_time_ms": 2.603}, "training_iteration": 114, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.90655565261841, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 136800, "timesteps_total": 136800, "custom_metrics": {}, "iterations_since_restore": 114, "episodes_this_iter": 27, "episode_reward_min": -97.28639060982673, "date": "2025-09-04_17-31-09", "episode_reward_max": 2.000004349898961, "pid": 3651948, "timestamp": 1756999869, "episode_reward_mean": -84.21676647970006, "time_total_s": 4701.200223684311, "episodes_total": 2806, "episode_len_mean": 46.08}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 4740.942209243774, "info": {"sample_time_ms": 39946.04, "num_steps_trained": 138000, "grad_time_ms": 375.175, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1228.0399169921875, "policy_loss": -0.1427999883890152, "vf_explained_var": -0.6482807993888855, "entropy": 14.210469245910645, "cur_lr": 4.999999873689376e-05, "total_loss": 1227.91748046875, "kl": 0.013438764959573746}, "load_time_ms": 0.719, "num_steps_sampled": 138000, "update_time_ms": 2.585}, "training_iteration": 115, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.7419855594635, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 138000, "timesteps_total": 138000, "custom_metrics": {}, "iterations_since_restore": 115, "episodes_this_iter": 25, "episode_reward_min": -97.46278423272653, "date": "2025-09-04_17-31-49", "episode_reward_max": 2.000004349898961, "pid": 3651948, "timestamp": 1756999909, "episode_reward_mean": -84.02052648087194, "time_total_s": 4740.942209243774, "episodes_total": 2831, "episode_len_mean": 46.04}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 4780.744037866592, "info": {"sample_time_ms": 39880.803, "num_steps_trained": 139200, "grad_time_ms": 378.501, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1398.448974609375, "policy_loss": -0.13323861360549927, "vf_explained_var": -0.7571742534637451, "entropy": 14.20920467376709, "cur_lr": 4.999999873689376e-05, "total_loss": 1398.337646484375, "kl": 0.014472413808107376}, "load_time_ms": 0.736, "num_steps_sampled": 139200, "update_time_ms": 2.536}, "training_iteration": 116, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.80182862281799, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 139200, "timesteps_total": 139200, "custom_metrics": {}, "iterations_since_restore": 116, "episodes_this_iter": 24, "episode_reward_min": -97.46278423272653, "date": "2025-09-04_17-32-29", "episode_reward_max": 2.000004349898961, "pid": 3651948, "timestamp": 1756999949, "episode_reward_mean": -84.00878538868815, "time_total_s": 4780.744037866592, "episodes_total": 2855, "episode_len_mean": 46.0}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 4820.50555896759, "info": {"sample_time_ms": 39688.761, "num_steps_trained": 140400, "grad_time_ms": 375.63, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1149.515380859375, "policy_loss": -0.12735703587532043, "vf_explained_var": -0.6808863878250122, "entropy": 13.910858154296875, "cur_lr": 4.999999873689376e-05, "total_loss": 1149.405517578125, "kl": 0.01153584010899067}, "load_time_ms": 0.717, "num_steps_sampled": 140400, "update_time_ms": 2.537}, "training_iteration": 117, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.761521100997925, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 140400, "timesteps_total": 140400, "custom_metrics": {}, "iterations_since_restore": 117, "episodes_this_iter": 27, "episode_reward_min": -97.46278423272653, "date": "2025-09-04_17-33-09", "episode_reward_max": -2.9198034618987947, "pid": 3651948, "timestamp": 1756999989, "episode_reward_mean": -86.9114394525108, "time_total_s": 4820.50555896759, "episodes_total": 2882, "episode_len_mean": 47.45}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 4860.424870014191, "info": {"sample_time_ms": 39652.325, "num_steps_trained": 141600, "grad_time_ms": 373.139, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1287.9305419921875, "policy_loss": -0.13839703798294067, "vf_explained_var": -0.6928651332855225, "entropy": 14.062501907348633, "cur_lr": 4.999999873689376e-05, "total_loss": 1287.81640625, "kl": 0.01598420925438404}, "load_time_ms": 0.713, "num_steps_sampled": 141600, "update_time_ms": 2.516}, "training_iteration": 118, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.91931104660034, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 141600, "timesteps_total": 141600, "custom_metrics": {}, "iterations_since_restore": 118, "episodes_this_iter": 26, "episode_reward_min": -97.46278423272653, "date": "2025-09-04_17-33-49", "episode_reward_max": -2.9198034618987947, "pid": 3651948, "timestamp": 1757000029, "episode_reward_mean": -86.97735539907637, "time_total_s": 4860.424870014191, "episodes_total": 2908, "episode_len_mean": 47.41}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 4901.108229875565, "info": {"sample_time_ms": 39714.083, "num_steps_trained": 142800, "grad_time_ms": 372.653, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1239.31298828125, "policy_loss": -0.14445358514785767, "vf_explained_var": -0.7411688566207886, "entropy": 14.128597259521484, "cur_lr": 4.999999873689376e-05, "total_loss": 1239.1910400390625, "kl": 0.014916145242750645}, "load_time_ms": 0.712, "num_steps_sampled": 142800, "update_time_ms": 2.522}, "training_iteration": 119, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.6833598613739, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 142800, "timesteps_total": 142800, "custom_metrics": {}, "iterations_since_restore": 119, "episodes_this_iter": 27, "episode_reward_min": -97.12466194862068, "date": "2025-09-04_17-34-29", "episode_reward_max": 7.805300910529125, "pid": 3651948, "timestamp": 1757000069, "episode_reward_mean": -84.39691080163124, "time_total_s": 4901.108229875565, "episodes_total": 2935, "episode_len_mean": 46.3}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 4941.155441761017, "info": {"sample_time_ms": 39607.559, "num_steps_trained": 144000, "grad_time_ms": 370.35, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1206.25048828125, "policy_loss": -0.12830425798892975, "vf_explained_var": -0.652219831943512, "entropy": 14.344528198242188, "cur_lr": 4.999999873689376e-05, "total_loss": 1206.142333984375, "kl": 0.01337106991559267}, "load_time_ms": 0.712, "num_steps_sampled": 144000, "update_time_ms": 2.504}, "training_iteration": 120, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.04721188545227, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 144000, "timesteps_total": 144000, "custom_metrics": {}, "iterations_since_restore": 120, "episodes_this_iter": 26, "episode_reward_min": -97.12466194862068, "date": "2025-09-04_17-35-09", "episode_reward_max": 7.805300910529125, "pid": 3651948, "timestamp": 1757000109, "episode_reward_mean": -82.81832333655902, "time_total_s": 4941.155441761017, "episodes_total": 2961, "episode_len_mean": 45.62}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 4981.022238731384, "info": {"sample_time_ms": 39599.494, "num_steps_trained": 145200, "grad_time_ms": 367.911, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1186.442138671875, "policy_loss": -0.1330496370792389, "vf_explained_var": -0.6136595606803894, "entropy": 14.419291496276855, "cur_lr": 4.999999873689376e-05, "total_loss": 1186.3292236328125, "kl": 0.013351598754525185}, "load_time_ms": 0.701, "num_steps_sampled": 145200, "update_time_ms": 2.481}, "training_iteration": 121, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.86679697036743, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 145200, "timesteps_total": 145200, "custom_metrics": {}, "iterations_since_restore": 121, "episodes_this_iter": 26, "episode_reward_min": -97.12466194862068, "date": "2025-09-04_17-35-49", "episode_reward_max": 7.805300910529125, "pid": 3651948, "timestamp": 1757000149, "episode_reward_mean": -81.44347113403737, "time_total_s": 4981.022238731384, "episodes_total": 2987, "episode_len_mean": 45.06}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 5020.848915338516, "info": {"sample_time_ms": 39604.063, "num_steps_trained": 146400, "grad_time_ms": 367.975, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1180.305419921875, "policy_loss": -0.13235728442668915, "vf_explained_var": -0.553139865398407, "entropy": 14.17344856262207, "cur_lr": 4.999999873689376e-05, "total_loss": 1180.1942138671875, "kl": 0.013966232538223267}, "load_time_ms": 0.704, "num_steps_sampled": 146400, "update_time_ms": 2.469}, "training_iteration": 122, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.82667660713196, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 146400, "timesteps_total": 146400, "custom_metrics": {}, "iterations_since_restore": 122, "episodes_this_iter": 27, "episode_reward_min": -96.53858566931319, "date": "2025-09-04_17-36-29", "episode_reward_max": 7.805300910529125, "pid": 3651948, "timestamp": 1757000189, "episode_reward_mean": -82.05372274573158, "time_total_s": 5020.848915338516, "episodes_total": 3014, "episode_len_mean": 45.31}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 5060.564336061478, "info": {"sample_time_ms": 39551.678, "num_steps_trained": 147600, "grad_time_ms": 367.708, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1200.6090087890625, "policy_loss": -0.14686201512813568, "vf_explained_var": -0.4810258448123932, "entropy": 14.21179485321045, "cur_lr": 4.999999873689376e-05, "total_loss": 1200.483154296875, "kl": 0.01375659555196762}, "load_time_ms": 0.693, "num_steps_sampled": 147600, "update_time_ms": 2.472}, "training_iteration": 123, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.715420722961426, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 147600, "timesteps_total": 147600, "custom_metrics": {}, "iterations_since_restore": 123, "episodes_this_iter": 26, "episode_reward_min": -96.53858566931319, "date": "2025-09-04_17-37-09", "episode_reward_max": -1.4890587415309486, "pid": 3651948, "timestamp": 1757000229, "episode_reward_mean": -83.17987228195717, "time_total_s": 5060.564336061478, "episodes_total": 3040, "episode_len_mean": 45.85}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 5100.991299629211, "info": {"sample_time_ms": 39605.376, "num_steps_trained": 148800, "grad_time_ms": 365.949, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1039.895751953125, "policy_loss": -0.14113567769527435, "vf_explained_var": -0.5060775876045227, "entropy": 13.987238883972168, "cur_lr": 4.999999873689376e-05, "total_loss": 1039.7767333984375, "kl": 0.014459229074418545}, "load_time_ms": 0.692, "num_steps_sampled": 148800, "update_time_ms": 2.55}, "training_iteration": 124, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.426963567733765, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 148800, "timesteps_total": 148800, "custom_metrics": {}, "iterations_since_restore": 124, "episodes_this_iter": 25, "episode_reward_min": -96.53858566931319, "date": "2025-09-04_17-37-49", "episode_reward_max": -1.4890587415309486, "pid": 3651948, "timestamp": 1757000269, "episode_reward_mean": -82.2096470112448, "time_total_s": 5100.991299629211, "episodes_total": 3065, "episode_len_mean": 45.5}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 5140.684392690659, "info": {"sample_time_ms": 39602.107, "num_steps_trained": 150000, "grad_time_ms": 364.369, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 1045.89404296875, "policy_loss": -0.1301415115594864, "vf_explained_var": -0.6093275547027588, "entropy": 14.328471183776855, "cur_lr": 4.999999873689376e-05, "total_loss": 1045.78515625, "kl": 0.013961934484541416}, "load_time_ms": 0.67, "num_steps_sampled": 150000, "update_time_ms": 2.543}, "training_iteration": 125, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.693093061447144, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 150000, "timesteps_total": 150000, "custom_metrics": {}, "iterations_since_restore": 125, "episodes_this_iter": 26, "episode_reward_min": -96.53858566931319, "date": "2025-09-04_17-38-29", "episode_reward_max": -1.4890587415309486, "pid": 3651948, "timestamp": 1757000309, "episode_reward_mean": -83.63141672561046, "time_total_s": 5140.684392690659, "episodes_total": 3091, "episode_len_mean": 46.22}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 5180.632479429245, "info": {"sample_time_ms": 39617.571, "num_steps_trained": 151200, "grad_time_ms": 363.531, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 757.4061279296875, "policy_loss": -0.13331261277198792, "vf_explained_var": -0.2395211011171341, "entropy": 14.12633991241455, "cur_lr": 4.999999873689376e-05, "total_loss": 757.2942504882812, "kl": 0.014148239977657795}, "load_time_ms": 0.659, "num_steps_sampled": 151200, "update_time_ms": 2.538}, "training_iteration": 126, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.948086738586426, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 151200, "timesteps_total": 151200, "custom_metrics": {}, "iterations_since_restore": 126, "episodes_this_iter": 26, "episode_reward_min": -96.40172250854067, "date": "2025-09-04_17-39-09", "episode_reward_max": -1.4890587415309486, "pid": 3651948, "timestamp": 1757000349, "episode_reward_mean": -83.57069733948921, "time_total_s": 5180.632479429245, "episodes_total": 3117, "episode_len_mean": 46.48}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 5220.544438838959, "info": {"sample_time_ms": 39629.25, "num_steps_trained": 152400, "grad_time_ms": 366.799, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 601.220458984375, "policy_loss": -0.13246841728687286, "vf_explained_var": -0.1447088122367859, "entropy": 14.11307430267334, "cur_lr": 4.999999873689376e-05, "total_loss": 601.1065673828125, "kl": 0.012203659862279892}, "load_time_ms": 0.661, "num_steps_sampled": 152400, "update_time_ms": 2.599}, "training_iteration": 127, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.911959409713745, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 152400, "timesteps_total": 152400, "custom_metrics": {}, "iterations_since_restore": 127, "episodes_this_iter": 28, "episode_reward_min": -96.33489906872681, "date": "2025-09-04_17-39-49", "episode_reward_max": 6.00000171303838, "pid": 3651948, "timestamp": 1757000389, "episode_reward_mean": -82.60059780921091, "time_total_s": 5220.544438838959, "episodes_total": 3145, "episode_len_mean": 46.14}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 5261.096935510635, "info": {"sample_time_ms": 39690.801, "num_steps_trained": 153600, "grad_time_ms": 368.482, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 506.0225830078125, "policy_loss": -0.12658780813217163, "vf_explained_var": -0.06294663995504379, "entropy": 14.06998062133789, "cur_lr": 4.999999873689376e-05, "total_loss": 505.9153747558594, "kl": 0.012763193808495998}, "load_time_ms": 0.666, "num_steps_sampled": 153600, "update_time_ms": 2.658}, "training_iteration": 128, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.552496671676636, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 153600, "timesteps_total": 153600, "custom_metrics": {}, "iterations_since_restore": 128, "episodes_this_iter": 25, "episode_reward_min": -96.33489906872681, "date": "2025-09-04_17-40-30", "episode_reward_max": 6.00000171303838, "pid": 3651948, "timestamp": 1757000430, "episode_reward_mean": -83.05137308127264, "time_total_s": 5261.096935510635, "episodes_total": 3170, "episode_len_mean": 46.31}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 5300.679358243942, "info": {"sample_time_ms": 39578.543, "num_steps_trained": 154800, "grad_time_ms": 370.609, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 431.7116394042969, "policy_loss": -0.15474864840507507, "vf_explained_var": -0.01031529251486063, "entropy": 14.282392501831055, "cur_lr": 4.999999873689376e-05, "total_loss": 431.57843017578125, "kl": 0.01416665967553854}, "load_time_ms": 0.681, "num_steps_sampled": 154800, "update_time_ms": 2.658}, "training_iteration": 129, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.582422733306885, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 154800, "timesteps_total": 154800, "custom_metrics": {}, "iterations_since_restore": 129, "episodes_this_iter": 24, "episode_reward_min": -95.68258263100707, "date": "2025-09-04_17-41-09", "episode_reward_max": 6.00000171303838, "pid": 3651948, "timestamp": 1757000469, "episode_reward_mean": -83.72035979343802, "time_total_s": 5300.679358243942, "episodes_total": 3194, "episode_len_mean": 46.6}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 5340.522296190262, "info": {"sample_time_ms": 39556.789, "num_steps_trained": 156000, "grad_time_ms": 371.925, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 437.48919677734375, "policy_loss": -0.15160608291625977, "vf_explained_var": 0.0015125274658203125, "entropy": 14.29353141784668, "cur_lr": 4.999999873689376e-05, "total_loss": 437.3584899902344, "kl": 0.0137290358543396}, "load_time_ms": 0.674, "num_steps_sampled": 156000, "update_time_ms": 2.657}, "training_iteration": 130, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.84293794631958, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 156000, "timesteps_total": 156000, "custom_metrics": {}, "iterations_since_restore": 130, "episodes_this_iter": 25, "episode_reward_min": -95.68258263100707, "date": "2025-09-04_17-41-49", "episode_reward_max": 6.00000171303838, "pid": 3651948, "timestamp": 1757000509, "episode_reward_mean": -84.13483640434106, "time_total_s": 5340.522296190262, "episodes_total": 3219, "episode_len_mean": 46.62}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 5380.822713375092, "info": {"sample_time_ms": 39598.8, "num_steps_trained": 157200, "grad_time_ms": 373.285, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 529.89306640625, "policy_loss": -0.13991203904151917, "vf_explained_var": 0.0049516428261995316, "entropy": 13.942055702209473, "cur_lr": 4.999999873689376e-05, "total_loss": 529.7734375, "kl": 0.013350359164178371}, "load_time_ms": 0.672, "num_steps_sampled": 157200, "update_time_ms": 2.644}, "training_iteration": 131, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.30041718482971, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 157200, "timesteps_total": 157200, "custom_metrics": {}, "iterations_since_restore": 131, "episodes_this_iter": 26, "episode_reward_min": -98.09153998826689, "date": "2025-09-04_17-42-30", "episode_reward_max": 0.0009420488181604014, "pid": 3651948, "timestamp": 1757000550, "episode_reward_mean": -85.1697245143577, "time_total_s": 5380.822713375092, "episodes_total": 3245, "episode_len_mean": 47.13}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 5421.157953977585, "info": {"sample_time_ms": 39650.571, "num_steps_trained": 158400, "grad_time_ms": 372.43, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 466.9972229003906, "policy_loss": -0.16029267013072968, "vf_explained_var": 0.00613213237375021, "entropy": 13.89816665649414, "cur_lr": 4.999999873689376e-05, "total_loss": 466.85931396484375, "kl": 0.014697965234518051}, "load_time_ms": 0.67, "num_steps_sampled": 158400, "update_time_ms": 2.623}, "training_iteration": 132, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.335240602493286, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 158400, "timesteps_total": 158400, "custom_metrics": {}, "iterations_since_restore": 132, "episodes_this_iter": 28, "episode_reward_min": -98.09153998826689, "date": "2025-09-04_17-43-10", "episode_reward_max": 0.0009420488181604014, "pid": 3651948, "timestamp": 1757000590, "episode_reward_mean": -83.4383318103255, "time_total_s": 5421.157953977585, "episodes_total": 3273, "episode_len_mean": 46.48}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 5461.025522947311, "info": {"sample_time_ms": 39664.748, "num_steps_trained": 159600, "grad_time_ms": 373.541, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 435.2093200683594, "policy_loss": -0.14789816737174988, "vf_explained_var": 0.018822822719812393, "entropy": 13.995210647583008, "cur_lr": 4.999999873689376e-05, "total_loss": 435.0834045410156, "kl": 0.014483905397355556}, "load_time_ms": 0.672, "num_steps_sampled": 159600, "update_time_ms": 2.6}, "training_iteration": 133, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.86756896972656, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 159600, "timesteps_total": 159600, "custom_metrics": {}, "iterations_since_restore": 133, "episodes_this_iter": 27, "episode_reward_min": -98.09153998826689, "date": "2025-09-04_17-43-50", "episode_reward_max": 0.0009420488181604014, "pid": 3651948, "timestamp": 1757000630, "episode_reward_mean": -80.48768962719447, "time_total_s": 5461.025522947311, "episodes_total": 3300, "episode_len_mean": 45.15}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 5502.38499712944, "info": {"sample_time_ms": 39756.354, "num_steps_trained": 160800, "grad_time_ms": 375.214, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 497.5989685058594, "policy_loss": -0.1495353728532791, "vf_explained_var": 0.03133540600538254, "entropy": 14.031764030456543, "cur_lr": 4.999999873689376e-05, "total_loss": 497.4719543457031, "kl": 0.014864559285342693}, "load_time_ms": 0.676, "num_steps_sampled": 160800, "update_time_ms": 2.575}, "training_iteration": 134, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 41.359474182128906, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 160800, "timesteps_total": 160800, "custom_metrics": {}, "iterations_since_restore": 134, "episodes_this_iter": 28, "episode_reward_min": -96.25872951173972, "date": "2025-09-04_17-44-31", "episode_reward_max": -5.1776342299954425, "pid": 3651948, "timestamp": 1757000671, "episode_reward_mean": -77.20826072261913, "time_total_s": 5502.38499712944, "episodes_total": 3328, "episode_len_mean": 43.67}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 5542.784331083298, "info": {"sample_time_ms": 39823.63, "num_steps_trained": 162000, "grad_time_ms": 378.518, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 494.53619384765625, "policy_loss": -0.15687929093837738, "vf_explained_var": 0.006207088474184275, "entropy": 13.837095260620117, "cur_lr": 4.999999873689376e-05, "total_loss": 494.3984375, "kl": 0.012575294822454453}, "load_time_ms": 0.688, "num_steps_sampled": 162000, "update_time_ms": 2.586}, "training_iteration": 135, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.39933395385742, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 162000, "timesteps_total": 162000, "custom_metrics": {}, "iterations_since_restore": 135, "episodes_this_iter": 30, "episode_reward_min": -96.70286184158135, "date": "2025-09-04_17-45-12", "episode_reward_max": 8.000000400002254, "pid": 3651948, "timestamp": 1757000712, "episode_reward_mean": -75.40999040364656, "time_total_s": 5542.784331083298, "episodes_total": 3358, "episode_len_mean": 42.73}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 5582.566870212555, "info": {"sample_time_ms": 39807.303, "num_steps_trained": 163200, "grad_time_ms": 378.286, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 429.1787109375, "policy_loss": -0.13850900530815125, "vf_explained_var": 0.004715243820101023, "entropy": 13.751395225524902, "cur_lr": 4.999999873689376e-05, "total_loss": 429.0587463378906, "kl": 0.01221616193652153}, "load_time_ms": 0.693, "num_steps_sampled": 163200, "update_time_ms": 2.603}, "training_iteration": 136, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.7825391292572, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 163200, "timesteps_total": 163200, "custom_metrics": {}, "iterations_since_restore": 136, "episodes_this_iter": 24, "episode_reward_min": -96.70286184158135, "date": "2025-09-04_17-45-51", "episode_reward_max": 8.000000400002254, "pid": 3651948, "timestamp": 1757000751, "episode_reward_mean": -77.38409159796784, "time_total_s": 5582.566870212555, "episodes_total": 3382, "episode_len_mean": 43.6}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 5622.550188064575, "info": {"sample_time_ms": 39815.366, "num_steps_trained": 164400, "grad_time_ms": 377.402, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 519.8641357421875, "policy_loss": -0.15484751760959625, "vf_explained_var": 9.56919466261752e-05, "entropy": 14.056158065795898, "cur_lr": 4.999999873689376e-05, "total_loss": 519.7298583984375, "kl": 0.013529930263757706}, "load_time_ms": 0.695, "num_steps_sampled": 164400, "update_time_ms": 2.568}, "training_iteration": 137, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.983317852020264, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 164400, "timesteps_total": 164400, "custom_metrics": {}, "iterations_since_restore": 137, "episodes_this_iter": 25, "episode_reward_min": -96.70286184158135, "date": "2025-09-04_17-46-31", "episode_reward_max": 8.000000400002254, "pid": 3651948, "timestamp": 1757000791, "episode_reward_mean": -78.37530479396784, "time_total_s": 5622.550188064575, "episodes_total": 3407, "episode_len_mean": 44.37}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 5662.401572704315, "info": {"sample_time_ms": 39744.196, "num_steps_trained": 165600, "grad_time_ms": 378.524, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 511.96624755859375, "policy_loss": -0.14721018075942993, "vf_explained_var": 0.022053804248571396, "entropy": 13.871037483215332, "cur_lr": 4.999999873689376e-05, "total_loss": 511.83843994140625, "kl": 0.012805236503481865}, "load_time_ms": 0.686, "num_steps_sampled": 165600, "update_time_ms": 2.52}, "training_iteration": 138, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.85138463973999, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 165600, "timesteps_total": 165600, "custom_metrics": {}, "iterations_since_restore": 138, "episodes_this_iter": 29, "episode_reward_min": -96.70286184158135, "date": "2025-09-04_17-47-11", "episode_reward_max": 6.000004628464221, "pid": 3651948, "timestamp": 1757000831, "episode_reward_mean": -80.62368275993127, "time_total_s": 5662.401572704315, "episodes_total": 3436, "episode_len_mean": 45.52}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 5702.67907166481, "info": {"sample_time_ms": 39815.884, "num_steps_trained": 166800, "grad_time_ms": 376.383, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 462.9251403808594, "policy_loss": -0.14543704688549042, "vf_explained_var": 0.013895895332098007, "entropy": 13.724479675292969, "cur_lr": 4.999999873689376e-05, "total_loss": 462.7996826171875, "kl": 0.013188743032515049}, "load_time_ms": 0.678, "num_steps_sampled": 166800, "update_time_ms": 2.513}, "training_iteration": 139, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.277498960494995, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 166800, "timesteps_total": 166800, "custom_metrics": {}, "iterations_since_restore": 139, "episodes_this_iter": 26, "episode_reward_min": -96.3706927890899, "date": "2025-09-04_17-47-51", "episode_reward_max": 6.000004628464221, "pid": 3651948, "timestamp": 1757000871, "episode_reward_mean": -82.57511411226803, "time_total_s": 5702.67907166481, "episodes_total": 3462, "episode_len_mean": 46.38}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 5742.838150262833, "info": {"sample_time_ms": 39847.8, "num_steps_trained": 168000, "grad_time_ms": 376.118, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 468.1904296875, "policy_loss": -0.12903155386447906, "vf_explained_var": 0.010596592910587788, "entropy": 13.66910457611084, "cur_lr": 4.999999873689376e-05, "total_loss": 468.0818786621094, "kl": 0.013512490317225456}, "load_time_ms": 0.673, "num_steps_sampled": 168000, "update_time_ms": 2.509}, "training_iteration": 140, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.15907859802246, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 168000, "timesteps_total": 168000, "custom_metrics": {}, "iterations_since_restore": 140, "episodes_this_iter": 25, "episode_reward_min": -96.3706927890899, "date": "2025-09-04_17-48-32", "episode_reward_max": 4.00041902346528, "pid": 3651948, "timestamp": 1757000912, "episode_reward_mean": -81.06491087767878, "time_total_s": 5742.838150262833, "episodes_total": 3487, "episode_len_mean": 45.78}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 5783.118670940399, "info": {"sample_time_ms": 39845.818, "num_steps_trained": 169200, "grad_time_ms": 376.048, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 519.5208129882812, "policy_loss": -0.14394643902778625, "vf_explained_var": 0.007588174659758806, "entropy": 13.70052433013916, "cur_lr": 4.999999873689376e-05, "total_loss": 519.397705078125, "kl": 0.01370695885270834}, "load_time_ms": 0.678, "num_steps_sampled": 169200, "update_time_ms": 2.517}, "training_iteration": 141, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.28052067756653, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 169200, "timesteps_total": 169200, "custom_metrics": {}, "iterations_since_restore": 141, "episodes_this_iter": 30, "episode_reward_min": -94.9174978999614, "date": "2025-09-04_17-49-12", "episode_reward_max": 4.00041902346528, "pid": 3651948, "timestamp": 1757000952, "episode_reward_mean": -78.30766103408874, "time_total_s": 5783.118670940399, "episodes_total": 3517, "episode_len_mean": 44.28}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 5823.950105428696, "info": {"sample_time_ms": 39894.033, "num_steps_trained": 170400, "grad_time_ms": 377.402, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 533.2041015625, "policy_loss": -0.15218709409236908, "vf_explained_var": 0.027479078620672226, "entropy": 14.154834747314453, "cur_lr": 4.999999873689376e-05, "total_loss": 533.073486328125, "kl": 0.014139831066131592}, "load_time_ms": 0.687, "num_steps_sampled": 170400, "update_time_ms": 2.548}, "training_iteration": 142, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.83143448829651, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 170400, "timesteps_total": 170400, "custom_metrics": {}, "iterations_since_restore": 142, "episodes_this_iter": 28, "episode_reward_min": -95.80786370762291, "date": "2025-09-04_17-49-53", "episode_reward_max": 2.000266023377246, "pid": 3651948, "timestamp": 1757000993, "episode_reward_mean": -75.77111625323947, "time_total_s": 5823.950105428696, "episodes_total": 3545, "episode_len_mean": 43.22}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 5864.083309173584, "info": {"sample_time_ms": 39920.658, "num_steps_trained": 171600, "grad_time_ms": 377.249, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 447.897216796875, "policy_loss": -0.14685262739658356, "vf_explained_var": 0.012203852646052837, "entropy": 13.77534008026123, "cur_lr": 4.999999873689376e-05, "total_loss": 447.7716979980469, "kl": 0.014064337126910686}, "load_time_ms": 0.698, "num_steps_sampled": 171600, "update_time_ms": 2.559}, "training_iteration": 143, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.133203744888306, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 171600, "timesteps_total": 171600, "custom_metrics": {}, "iterations_since_restore": 143, "episodes_this_iter": 28, "episode_reward_min": -95.80786370762291, "date": "2025-09-04_17-50-33", "episode_reward_max": 2.000266023377246, "pid": 3651948, "timestamp": 1757001033, "episode_reward_mean": -74.91504953685117, "time_total_s": 5864.083309173584, "episodes_total": 3573, "episode_len_mean": 42.82}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 5904.289658069611, "info": {"sample_time_ms": 39806.058, "num_steps_trained": 172800, "grad_time_ms": 376.511, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 439.82000732421875, "policy_loss": -0.15573416650295258, "vf_explained_var": 0.010575804859399796, "entropy": 13.51430892944336, "cur_lr": 4.999999873689376e-05, "total_loss": 439.6846008300781, "kl": 0.013372303918004036}, "load_time_ms": 0.689, "num_steps_sampled": 172800, "update_time_ms": 2.545}, "training_iteration": 144, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.20634889602661, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 172800, "timesteps_total": 172800, "custom_metrics": {}, "iterations_since_restore": 144, "episodes_this_iter": 26, "episode_reward_min": -96.95658790248578, "date": "2025-09-04_17-51-13", "episode_reward_max": 2.000266023377246, "pid": 3651948, "timestamp": 1757001073, "episode_reward_mean": -77.47728638204703, "time_total_s": 5904.289658069611, "episodes_total": 3599, "episode_len_mean": 43.86}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 5944.542104482651, "info": {"sample_time_ms": 39793.73, "num_steps_trained": 174000, "grad_time_ms": 374.167, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 489.4002685546875, "policy_loss": -0.15363189578056335, "vf_explained_var": 0.010992010124027729, "entropy": 13.861942291259766, "cur_lr": 4.999999873689376e-05, "total_loss": 489.2677307128906, "kl": 0.013880307786166668}, "load_time_ms": 0.683, "num_steps_sampled": 174000, "update_time_ms": 2.551}, "training_iteration": 145, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.25244641304016, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 174000, "timesteps_total": 174000, "custom_metrics": {}, "iterations_since_restore": 145, "episodes_this_iter": 30, "episode_reward_min": -96.95658790248578, "date": "2025-09-04_17-51-53", "episode_reward_max": 6.00050672631794, "pid": 3651948, "timestamp": 1757001113, "episode_reward_mean": -77.48228222365948, "time_total_s": 5944.542104482651, "episodes_total": 3629, "episode_len_mean": 43.74}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 5984.474422693253, "info": {"sample_time_ms": 39808.244, "num_steps_trained": 175200, "grad_time_ms": 374.642, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 483.8934326171875, "policy_loss": -0.16133642196655273, "vf_explained_var": 0.004873269237577915, "entropy": 13.626518249511719, "cur_lr": 4.999999873689376e-05, "total_loss": 483.7530517578125, "kl": 0.013784998096525669}, "load_time_ms": 0.681, "num_steps_sampled": 175200, "update_time_ms": 2.542}, "training_iteration": 146, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.93231821060181, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 175200, "timesteps_total": 175200, "custom_metrics": {}, "iterations_since_restore": 146, "episodes_this_iter": 25, "episode_reward_min": -96.95658790248578, "date": "2025-09-04_17-52-33", "episode_reward_max": 6.00050672631794, "pid": 3651948, "timestamp": 1757001153, "episode_reward_mean": -77.00879523596886, "time_total_s": 5984.474422693253, "episodes_total": 3654, "episode_len_mean": 43.39}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 6024.323261499405, "info": {"sample_time_ms": 39797.488, "num_steps_trained": 176400, "grad_time_ms": 371.987, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 489.1770324707031, "policy_loss": -0.1577053815126419, "vf_explained_var": 0.02425098419189453, "entropy": 13.7898530960083, "cur_lr": 4.999999873689376e-05, "total_loss": 489.03948974609375, "kl": 0.013261471875011921}, "load_time_ms": 0.676, "num_steps_sampled": 176400, "update_time_ms": 2.547}, "training_iteration": 147, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.848838806152344, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 176400, "timesteps_total": 176400, "custom_metrics": {}, "iterations_since_restore": 147, "episodes_this_iter": 28, "episode_reward_min": -96.95658790248578, "date": "2025-09-04_17-53-13", "episode_reward_max": 6.00050672631794, "pid": 3651948, "timestamp": 1757001193, "episode_reward_mean": -76.72782283086792, "time_total_s": 6024.323261499405, "episodes_total": 3682, "episode_len_mean": 43.29}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 6064.125794410706, "info": {"sample_time_ms": 39792.766, "num_steps_trained": 177600, "grad_time_ms": 371.849, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 526.368408203125, "policy_loss": -0.14639145135879517, "vf_explained_var": 0.020639657974243164, "entropy": 13.701428413391113, "cur_lr": 4.999999873689376e-05, "total_loss": 526.2421875, "kl": 0.013283911161124706}, "load_time_ms": 0.686, "num_steps_sampled": 177600, "update_time_ms": 2.554}, "training_iteration": 148, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 39.80253291130066, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 177600, "timesteps_total": 177600, "custom_metrics": {}, "iterations_since_restore": 148, "episodes_this_iter": 27, "episode_reward_min": -96.31788008641165, "date": "2025-09-04_17-53-53", "episode_reward_max": 6.00050672631794, "pid": 3651948, "timestamp": 1757001233, "episode_reward_mean": -77.02193331284515, "time_total_s": 6064.125794410706, "episodes_total": 3709, "episode_len_mean": 43.53}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 6104.521768093109, "info": {"sample_time_ms": 39804.917, "num_steps_trained": 178800, "grad_time_ms": 371.567, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 534.6456298828125, "policy_loss": -0.14344368875026703, "vf_explained_var": 0.032623257488012314, "entropy": 13.884628295898438, "cur_lr": 4.999999873689376e-05, "total_loss": 534.522705078125, "kl": 0.013491793535649776}, "load_time_ms": 0.68, "num_steps_sampled": 178800, "update_time_ms": 2.547}, "training_iteration": 149, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.395973682403564, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 178800, "timesteps_total": 178800, "custom_metrics": {}, "iterations_since_restore": 149, "episodes_this_iter": 27, "episode_reward_min": -96.31788008641165, "date": "2025-09-04_17-54-34", "episode_reward_max": 4.000905065352485, "pid": 3651948, "timestamp": 1757001274, "episode_reward_mean": -77.59475084319799, "time_total_s": 6104.521768093109, "episodes_total": 3736, "episode_len_mean": 44.08}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 6144.585580587387, "info": {"sample_time_ms": 39797.247, "num_steps_trained": 180000, "grad_time_ms": 369.718, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 589.8328857421875, "policy_loss": -0.15737102925777435, "vf_explained_var": 0.0029666093178093433, "entropy": 13.93885326385498, "cur_lr": 4.999999873689376e-05, "total_loss": 589.6962280273438, "kl": 0.013605907559394836}, "load_time_ms": 0.682, "num_steps_sampled": 180000, "update_time_ms": 2.553}, "training_iteration": 150, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.063812494277954, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 180000, "timesteps_total": 180000, "custom_metrics": {}, "iterations_since_restore": 150, "episodes_this_iter": 28, "episode_reward_min": -96.31788008641165, "date": "2025-09-04_17-55-14", "episode_reward_max": 4.000905065352485, "pid": 3651948, "timestamp": 1757001314, "episode_reward_mean": -75.6539729494931, "time_total_s": 6144.585580587387, "episodes_total": 3764, "episode_len_mean": 43.33}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 6185.331671714783, "info": {"sample_time_ms": 39844.811, "num_steps_trained": 181200, "grad_time_ms": 368.735, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 573.21142578125, "policy_loss": -0.1537049263715744, "vf_explained_var": 0.009622778743505478, "entropy": 13.354726791381836, "cur_lr": 4.999999873689376e-05, "total_loss": 573.0790405273438, "kl": 0.01405271515250206}, "load_time_ms": 0.673, "num_steps_sampled": 181200, "update_time_ms": 2.562}, "training_iteration": 151, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.74609112739563, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 181200, "timesteps_total": 181200, "custom_metrics": {}, "iterations_since_restore": 151, "episodes_this_iter": 33, "episode_reward_min": -96.06327662108316, "date": "2025-09-04_17-55-54", "episode_reward_max": 6.000005684032507, "pid": 3651948, "timestamp": 1757001354, "episode_reward_mean": -73.6287210840856, "time_total_s": 6185.331671714783, "episodes_total": 3797, "episode_len_mean": 42.29}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 6225.614793539047, "info": {"sample_time_ms": 39791.345, "num_steps_trained": 182400, "grad_time_ms": 367.327, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 563.124755859375, "policy_loss": -0.14891427755355835, "vf_explained_var": 0.00879173818975687, "entropy": 13.944217681884766, "cur_lr": 4.999999873689376e-05, "total_loss": 562.9971923828125, "kl": 0.014039833098649979}, "load_time_ms": 0.658, "num_steps_sampled": 182400, "update_time_ms": 2.626}, "training_iteration": 152, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 40.283121824264526, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 182400, "timesteps_total": 182400, "custom_metrics": {}, "iterations_since_restore": 152, "episodes_this_iter": 29, "episode_reward_min": -96.06327662108316, "date": "2025-09-04_17-56-35", "episode_reward_max": 6.000005684032507, "pid": 3651948, "timestamp": 1757001395, "episode_reward_mean": -69.42951983223737, "time_total_s": 6225.614793539047, "episodes_total": 3826, "episode_len_mean": 40.17}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 6263.704438686371, "info": {"sample_time_ms": 39589.201, "num_steps_trained": 183600, "grad_time_ms": 365.196, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 451.091796875, "policy_loss": -0.16401776671409607, "vf_explained_var": 0.008112185634672642, "entropy": 13.623714447021484, "cur_lr": 4.999999873689376e-05, "total_loss": 450.9493103027344, "kl": 0.014164643362164497}, "load_time_ms": 0.653, "num_steps_sampled": 183600, "update_time_ms": 2.607}, "training_iteration": 153, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 38.08964514732361, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 183600, "timesteps_total": 183600, "custom_metrics": {}, "iterations_since_restore": 153, "episodes_this_iter": 27, "episode_reward_min": -96.06327662108316, "date": "2025-09-04_17-57-13", "episode_reward_max": 6.000005684032507, "pid": 3651948, "timestamp": 1757001433, "episode_reward_mean": -72.39619258771732, "time_total_s": 6263.704438686371, "episodes_total": 3853, "episode_len_mean": 41.43}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 6297.63969874382, "info": {"sample_time_ms": 38960.78, "num_steps_trained": 184800, "grad_time_ms": 366.522, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 426.76007080078125, "policy_loss": -0.1499679982662201, "vf_explained_var": 0.019961846992373466, "entropy": 14.01634407043457, "cur_lr": 4.999999873689376e-05, "total_loss": 426.6302185058594, "kl": 0.01324660424143076}, "load_time_ms": 0.675, "num_steps_sampled": 184800, "update_time_ms": 2.571}, "training_iteration": 154, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.93526005744934, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 184800, "timesteps_total": 184800, "custom_metrics": {}, "iterations_since_restore": 154, "episodes_this_iter": 27, "episode_reward_min": -96.02294955272997, "date": "2025-09-04_17-57-47", "episode_reward_max": 6.000005684032507, "pid": 3651948, "timestamp": 1757001467, "episode_reward_mean": -73.46839023207444, "time_total_s": 6297.63969874382, "episodes_total": 3880, "episode_len_mean": 42.25}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 6330.8898866176605, "info": {"sample_time_ms": 38258.875, "num_steps_trained": 186000, "grad_time_ms": 368.158, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 463.51904296875, "policy_loss": -0.14390847086906433, "vf_explained_var": 0.0144983334466815, "entropy": 13.602646827697754, "cur_lr": 4.999999873689376e-05, "total_loss": 463.39495849609375, "kl": 0.013063447549939156}, "load_time_ms": 0.683, "num_steps_sampled": 186000, "update_time_ms": 2.576}, "training_iteration": 155, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.25018787384033, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 186000, "timesteps_total": 186000, "custom_metrics": {}, "iterations_since_restore": 155, "episodes_this_iter": 28, "episode_reward_min": -96.02294955272997, "date": "2025-09-04_17-58-20", "episode_reward_max": 6.000153967687247, "pid": 3651948, "timestamp": 1757001500, "episode_reward_mean": -74.1287173082405, "time_total_s": 6330.8898866176605, "episodes_total": 3908, "episode_len_mean": 42.73}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 6365.358276844025, "info": {"sample_time_ms": 37712.506, "num_steps_trained": 187200, "grad_time_ms": 368.12, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 447.3106384277344, "policy_loss": -0.1546049267053604, "vf_explained_var": 0.0173814557492733, "entropy": 13.743790626525879, "cur_lr": 4.999999873689376e-05, "total_loss": 447.1781311035156, "kl": 0.014562149532139301}, "load_time_ms": 0.677, "num_steps_sampled": 187200, "update_time_ms": 2.598}, "training_iteration": 156, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.468390226364136, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 187200, "timesteps_total": 187200, "custom_metrics": {}, "iterations_since_restore": 156, "episodes_this_iter": 29, "episode_reward_min": -96.02294955272997, "date": "2025-09-04_17-58-55", "episode_reward_max": 6.000153967687247, "pid": 3651948, "timestamp": 1757001535, "episode_reward_mean": -73.961188285357, "time_total_s": 6365.358276844025, "episodes_total": 3937, "episode_len_mean": 42.69}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 6398.991594314575, "info": {"sample_time_ms": 37090.133, "num_steps_trained": 188400, "grad_time_ms": 368.892, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 432.24102783203125, "policy_loss": -0.16239456832408905, "vf_explained_var": 0.02992052584886551, "entropy": 13.820674896240234, "cur_lr": 4.999999873689376e-05, "total_loss": 432.09716796875, "kl": 0.012198535725474358}, "load_time_ms": 0.68, "num_steps_sampled": 188400, "update_time_ms": 2.579}, "training_iteration": 157, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.63331747055054, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 188400, "timesteps_total": 188400, "custom_metrics": {}, "iterations_since_restore": 157, "episodes_this_iter": 28, "episode_reward_min": -94.76394572856147, "date": "2025-09-04_17-59-28", "episode_reward_max": 6.000153967687247, "pid": 3651948, "timestamp": 1757001568, "episode_reward_mean": -73.47760643679179, "time_total_s": 6398.991594314575, "episodes_total": 3965, "episode_len_mean": 42.53}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 6432.306711435318, "info": {"sample_time_ms": 36440.832, "num_steps_trained": 189600, "grad_time_ms": 369.403, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 460.80865478515625, "policy_loss": -0.15355131030082703, "vf_explained_var": 0.014817522838711739, "entropy": 13.547548294067383, "cur_lr": 4.999999873689376e-05, "total_loss": 460.6759948730469, "kl": 0.013771760277450085}, "load_time_ms": 0.688, "num_steps_sampled": 189600, "update_time_ms": 2.583}, "training_iteration": 158, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.3151171207428, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 189600, "timesteps_total": 189600, "custom_metrics": {}, "iterations_since_restore": 158, "episodes_this_iter": 27, "episode_reward_min": -94.76394572856147, "date": "2025-09-04_18-00-02", "episode_reward_max": 4.000566881068873, "pid": 3651948, "timestamp": 1757001602, "episode_reward_mean": -74.17728514692062, "time_total_s": 6432.306711435318, "episodes_total": 3992, "episode_len_mean": 42.67}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 6465.709766387939, "info": {"sample_time_ms": 35739.848, "num_steps_trained": 190800, "grad_time_ms": 371.059, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 503.6580810546875, "policy_loss": -0.15453101694583893, "vf_explained_var": 0.01870148628950119, "entropy": 13.329124450683594, "cur_lr": 4.999999873689376e-05, "total_loss": 503.5256042480469, "kl": 0.014533232897520065}, "load_time_ms": 0.693, "num_steps_sampled": 190800, "update_time_ms": 2.617}, "training_iteration": 159, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.40305495262146, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 190800, "timesteps_total": 190800, "custom_metrics": {}, "iterations_since_restore": 159, "episodes_this_iter": 31, "episode_reward_min": -95.1787811615368, "date": "2025-09-04_18-00-35", "episode_reward_max": 4.000566881068873, "pid": 3651948, "timestamp": 1757001635, "episode_reward_mean": -73.62973317615167, "time_total_s": 6465.709766387939, "episodes_total": 4023, "episode_len_mean": 42.36}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 6499.890940904617, "info": {"sample_time_ms": 35151.71, "num_steps_trained": 192000, "grad_time_ms": 370.927, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 504.2874450683594, "policy_loss": -0.16084225475788116, "vf_explained_var": 0.008933212608098984, "entropy": 13.373307228088379, "cur_lr": 4.999999873689376e-05, "total_loss": 504.1483154296875, "kl": 0.014286703430116177}, "load_time_ms": 0.698, "num_steps_sampled": 192000, "update_time_ms": 2.6}, "training_iteration": 160, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.181174516677856, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 192000, "timesteps_total": 192000, "custom_metrics": {}, "iterations_since_restore": 160, "episodes_this_iter": 28, "episode_reward_min": -95.1787811615368, "date": "2025-09-04_18-01-09", "episode_reward_max": 4.000566881068873, "pid": 3651948, "timestamp": 1757001669, "episode_reward_mean": -71.18900937259428, "time_total_s": 6499.890940904617, "episodes_total": 4051, "episode_len_mean": 41.31}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 6533.306238651276, "info": {"sample_time_ms": 34417.313, "num_steps_trained": 193200, "grad_time_ms": 372.291, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 443.4478759765625, "policy_loss": -0.14680147171020508, "vf_explained_var": 0.030300889164209366, "entropy": 13.579971313476562, "cur_lr": 4.999999873689376e-05, "total_loss": 443.3230285644531, "kl": 0.014476616866886616}, "load_time_ms": 0.704, "num_steps_sampled": 193200, "update_time_ms": 2.579}, "training_iteration": 161, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.415297746658325, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 193200, "timesteps_total": 193200, "custom_metrics": {}, "iterations_since_restore": 161, "episodes_this_iter": 29, "episode_reward_min": -95.1787811615368, "date": "2025-09-04_18-01-43", "episode_reward_max": 5.039762891774004, "pid": 3651948, "timestamp": 1757001703, "episode_reward_mean": -69.76854035672953, "time_total_s": 6533.306238651276, "episodes_total": 4080, "episode_len_mean": 40.67}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 6566.520789146423, "info": {"sample_time_ms": 33708.97, "num_steps_trained": 194400, "grad_time_ms": 373.746, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 498.4832458496094, "policy_loss": -0.15090115368366241, "vf_explained_var": 0.02782423608005047, "entropy": 13.286617279052734, "cur_lr": 4.999999873689376e-05, "total_loss": 498.3533020019531, "kl": 0.013792970217764378}, "load_time_ms": 0.731, "num_steps_sampled": 194400, "update_time_ms": 2.501}, "training_iteration": 162, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.214550495147705, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 194400, "timesteps_total": 194400, "custom_metrics": {}, "iterations_since_restore": 162, "episodes_this_iter": 27, "episode_reward_min": -95.11817286038946, "date": "2025-09-04_18-02-16", "episode_reward_max": 5.039762891774004, "pid": 3651948, "timestamp": 1757001736, "episode_reward_mean": -72.1282965410242, "time_total_s": 6566.520789146423, "episodes_total": 4107, "episode_len_mean": 41.9}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 6600.906383752823, "info": {"sample_time_ms": 33337.011, "num_steps_trained": 195600, "grad_time_ms": 375.233, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 498.46148681640625, "policy_loss": -0.15607160329818726, "vf_explained_var": 0.01455751433968544, "entropy": 13.587542533874512, "cur_lr": 4.999999873689376e-05, "total_loss": 498.3251953125, "kl": 0.012981893494725227}, "load_time_ms": 0.73, "num_steps_sampled": 195600, "update_time_ms": 2.558}, "training_iteration": 163, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.385594606399536, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 195600, "timesteps_total": 195600, "custom_metrics": {}, "iterations_since_restore": 163, "episodes_this_iter": 30, "episode_reward_min": -94.88398095419217, "date": "2025-09-04_18-02-50", "episode_reward_max": 5.039762891774004, "pid": 3651948, "timestamp": 1757001770, "episode_reward_mean": -72.87153451901149, "time_total_s": 6600.906383752823, "episodes_total": 4137, "episode_len_mean": 42.22}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 6634.513481616974, "info": {"sample_time_ms": 33304.768, "num_steps_trained": 196800, "grad_time_ms": 374.63, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 469.6560363769531, "policy_loss": -0.15397673845291138, "vf_explained_var": 0.018685288727283478, "entropy": 13.264029502868652, "cur_lr": 4.999999873689376e-05, "total_loss": 469.5223388671875, "kl": 0.013345572166144848}, "load_time_ms": 0.728, "num_steps_sampled": 196800, "update_time_ms": 2.584}, "training_iteration": 164, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.607097864151, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 196800, "timesteps_total": 196800, "custom_metrics": {}, "iterations_since_restore": 164, "episodes_this_iter": 27, "episode_reward_min": -96.75490613689337, "date": "2025-09-04_18-03-24", "episode_reward_max": 4.000333877647177, "pid": 3651948, "timestamp": 1757001804, "episode_reward_mean": -74.19180616935152, "time_total_s": 6634.513481616974, "episodes_total": 4164, "episode_len_mean": 42.63}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 6667.584945678711, "info": {"sample_time_ms": 33289.103, "num_steps_trained": 198000, "grad_time_ms": 372.48, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 494.9625549316406, "policy_loss": -0.15671184659004211, "vf_explained_var": 0.012010018341243267, "entropy": 13.140623092651367, "cur_lr": 4.999999873689376e-05, "total_loss": 494.8278503417969, "kl": 0.014488577842712402}, "load_time_ms": 0.71, "num_steps_sampled": 198000, "update_time_ms": 2.588}, "training_iteration": 165, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.07146406173706, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 198000, "timesteps_total": 198000, "custom_metrics": {}, "iterations_since_restore": 165, "episodes_this_iter": 28, "episode_reward_min": -96.75490613689337, "date": "2025-09-04_18-03-57", "episode_reward_max": 4.000333877647177, "pid": 3651948, "timestamp": 1757001837, "episode_reward_mean": -73.97899578124846, "time_total_s": 6667.584945678711, "episodes_total": 4192, "episode_len_mean": 42.41}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 6700.747930765152, "info": {"sample_time_ms": 33159.297, "num_steps_trained": 199200, "grad_time_ms": 371.752, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 479.74163818359375, "policy_loss": -0.16004471480846405, "vf_explained_var": 0.022694991901516914, "entropy": 13.23586368560791, "cur_lr": 4.999999873689376e-05, "total_loss": 479.6025390625, "kl": 0.01381033007055521}, "load_time_ms": 0.719, "num_steps_sampled": 199200, "update_time_ms": 2.576}, "training_iteration": 166, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.16298508644104, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 199200, "timesteps_total": 199200, "custom_metrics": {}, "iterations_since_restore": 166, "episodes_this_iter": 27, "episode_reward_min": -96.75490613689337, "date": "2025-09-04_18-04-30", "episode_reward_max": 4.000038700747987, "pid": 3651948, "timestamp": 1757001870, "episode_reward_mean": -74.60556796545686, "time_total_s": 6700.747930765152, "episodes_total": 4219, "episode_len_mean": 42.7}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 6734.210085391998, "info": {"sample_time_ms": 33141.034, "num_steps_trained": 200400, "grad_time_ms": 372.896, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 481.43072509765625, "policy_loss": -0.16376127302646637, "vf_explained_var": 0.02091793902218342, "entropy": 12.862247467041016, "cur_lr": 4.999999873689376e-05, "total_loss": 481.28900146484375, "kl": 0.014525890350341797}, "load_time_ms": 0.715, "num_steps_sampled": 200400, "update_time_ms": 2.6}, "training_iteration": 167, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.46215462684631, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 200400, "timesteps_total": 200400, "custom_metrics": {}, "iterations_since_restore": 167, "episodes_this_iter": 28, "episode_reward_min": -96.75490613689337, "date": "2025-09-04_18-05-04", "episode_reward_max": 4.000213607189957, "pid": 3651948, "timestamp": 1757001904, "episode_reward_mean": -75.69941167396468, "time_total_s": 6734.210085391998, "episodes_total": 4247, "episode_len_mean": 43.31}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 6768.151931285858, "info": {"sample_time_ms": 33205.106, "num_steps_trained": 201600, "grad_time_ms": 371.545, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 431.3326721191406, "policy_loss": -0.15331611037254333, "vf_explained_var": 0.0383436493575573, "entropy": 13.03227424621582, "cur_lr": 4.999999873689376e-05, "total_loss": 431.20166015625, "kl": 0.014662904664874077}, "load_time_ms": 0.701, "num_steps_sampled": 201600, "update_time_ms": 2.606}, "training_iteration": 168, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.94184589385986, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 201600, "timesteps_total": 201600, "custom_metrics": {}, "iterations_since_restore": 168, "episodes_this_iter": 28, "episode_reward_min": -96.37598652716197, "date": "2025-09-04_18-05-38", "episode_reward_max": 6.000130978520583, "pid": 3651948, "timestamp": 1757001938, "episode_reward_mean": -75.83625850182565, "time_total_s": 6768.151931285858, "episodes_total": 4275, "episode_len_mean": 43.39}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 6801.593436717987, "info": {"sample_time_ms": 33208.048, "num_steps_trained": 202800, "grad_time_ms": 372.423, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 445.61370849609375, "policy_loss": -0.15942586958408356, "vf_explained_var": 0.006676660850644112, "entropy": 13.269512176513672, "cur_lr": 4.999999873689376e-05, "total_loss": 445.47357177734375, "kl": 0.012679451145231724}, "load_time_ms": 0.715, "num_steps_sampled": 202800, "update_time_ms": 2.582}, "training_iteration": 169, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.441505432128906, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 202800, "timesteps_total": 202800, "custom_metrics": {}, "iterations_since_restore": 169, "episodes_this_iter": 29, "episode_reward_min": -96.63690140637001, "date": "2025-09-04_18-06-11", "episode_reward_max": 6.000130978520583, "pid": 3651948, "timestamp": 1757001971, "episode_reward_mean": -74.06123125540654, "time_total_s": 6801.593436717987, "episodes_total": 4304, "episode_len_mean": 42.65}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 6834.95436167717, "info": {"sample_time_ms": 33122.278, "num_steps_trained": 204000, "grad_time_ms": 376.1, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 462.88909912109375, "policy_loss": -0.14820978045463562, "vf_explained_var": 0.022156503051519394, "entropy": 12.96584415435791, "cur_lr": 4.999999873689376e-05, "total_loss": 462.76019287109375, "kl": 0.012699018232524395}, "load_time_ms": 0.736, "num_steps_sampled": 204000, "update_time_ms": 2.628}, "training_iteration": 170, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.36092495918274, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 204000, "timesteps_total": 204000, "custom_metrics": {}, "iterations_since_restore": 170, "episodes_this_iter": 26, "episode_reward_min": -96.63690140637001, "date": "2025-09-04_18-06-44", "episode_reward_max": 6.000130978520583, "pid": 3651948, "timestamp": 1757002004, "episode_reward_mean": -76.51463005896723, "time_total_s": 6834.95436167717, "episodes_total": 4330, "episode_len_mean": 43.76}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 6868.499571561813, "info": {"sample_time_ms": 33135.07, "num_steps_trained": 205200, "grad_time_ms": 376.271, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 477.64007568359375, "policy_loss": -0.14878112077713013, "vf_explained_var": 0.024289535358548164, "entropy": 13.268444061279297, "cur_lr": 4.999999873689376e-05, "total_loss": 477.5120544433594, "kl": 0.013624078594148159}, "load_time_ms": 0.738, "num_steps_sampled": 205200, "update_time_ms": 2.64}, "training_iteration": 171, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.545209884643555, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 205200, "timesteps_total": 205200, "custom_metrics": {}, "iterations_since_restore": 171, "episodes_this_iter": 32, "episode_reward_min": -96.63690140637001, "date": "2025-09-04_18-07-18", "episode_reward_max": 8.000069988583551, "pid": 3651948, "timestamp": 1757002038, "episode_reward_mean": -73.74854474290508, "time_total_s": 6868.499571561813, "episodes_total": 4362, "episode_len_mean": 42.39}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 6901.951131343842, "info": {"sample_time_ms": 33159.103, "num_steps_trained": 206400, "grad_time_ms": 376.037, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 530.1824951171875, "policy_loss": -0.16158553957939148, "vf_explained_var": 0.020052360370755196, "entropy": 13.253538131713867, "cur_lr": 4.999999873689376e-05, "total_loss": 530.0426025390625, "kl": 0.014295194298028946}, "load_time_ms": 0.717, "num_steps_sampled": 206400, "update_time_ms": 2.634}, "training_iteration": 172, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.4515597820282, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 206400, "timesteps_total": 206400, "custom_metrics": {}, "iterations_since_restore": 172, "episodes_this_iter": 32, "episode_reward_min": -95.13871117544228, "date": "2025-09-04_18-07-52", "episode_reward_max": 8.000069988583551, "pid": 3651948, "timestamp": 1757002072, "episode_reward_mean": -68.18772978706642, "time_total_s": 6901.951131343842, "episodes_total": 4394, "episode_len_mean": 39.92}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 6935.213408470154, "info": {"sample_time_ms": 33045.654, "num_steps_trained": 207600, "grad_time_ms": 377.213, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 471.45257568359375, "policy_loss": -0.16813132166862488, "vf_explained_var": 0.016370773315429688, "entropy": 13.099279403686523, "cur_lr": 4.999999873689376e-05, "total_loss": 471.30706787109375, "kl": 0.01488409098237753}, "load_time_ms": 0.714, "num_steps_sampled": 207600, "update_time_ms": 2.573}, "training_iteration": 173, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.262277126312256, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 207600, "timesteps_total": 207600, "custom_metrics": {}, "iterations_since_restore": 173, "episodes_this_iter": 29, "episode_reward_min": -95.06222590109232, "date": "2025-09-04_18-08-25", "episode_reward_max": 8.000069988583551, "pid": 3651948, "timestamp": 1757002105, "episode_reward_mean": -66.36500553024902, "time_total_s": 6935.213408470154, "episodes_total": 4423, "episode_len_mean": 39.06}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 6968.752385139465, "info": {"sample_time_ms": 33038.375, "num_steps_trained": 208800, "grad_time_ms": 377.726, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 489.8381042480469, "policy_loss": -0.14878961443901062, "vf_explained_var": 0.023384928703308105, "entropy": 13.308280944824219, "cur_lr": 4.999999873689376e-05, "total_loss": 489.7087707519531, "kl": 0.01280286256223917}, "load_time_ms": 0.711, "num_steps_sampled": 208800, "update_time_ms": 2.573}, "training_iteration": 174, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.53897666931152, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 208800, "timesteps_total": 208800, "custom_metrics": {}, "iterations_since_restore": 174, "episodes_this_iter": 30, "episode_reward_min": -93.55236840867342, "date": "2025-09-04_18-08-58", "episode_reward_max": 8.000109714939725, "pid": 3651948, "timestamp": 1757002138, "episode_reward_mean": -65.7865687842101, "time_total_s": 6968.752385139465, "episodes_total": 4453, "episode_len_mean": 38.8}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 7003.800618886948, "info": {"sample_time_ms": 33234.718, "num_steps_trained": 210000, "grad_time_ms": 379.053, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 441.1793518066406, "policy_loss": -0.14626182615756989, "vf_explained_var": 0.043350908905267715, "entropy": 13.320549964904785, "cur_lr": 4.999999873689376e-05, "total_loss": 441.05364990234375, "kl": 0.013550628907978535}, "load_time_ms": 0.718, "num_steps_sampled": 210000, "update_time_ms": 2.562}, "training_iteration": 175, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 35.0482337474823, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 210000, "timesteps_total": 210000, "custom_metrics": {}, "iterations_since_restore": 175, "episodes_this_iter": 31, "episode_reward_min": -93.82099905489598, "date": "2025-09-04_18-09-33", "episode_reward_max": 8.000109714939725, "pid": 3651948, "timestamp": 1757002173, "episode_reward_mean": -68.23131268739769, "time_total_s": 7003.800618886948, "episodes_total": 4484, "episode_len_mean": 40.02}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 7037.028426885605, "info": {"sample_time_ms": 33242.547, "num_steps_trained": 211200, "grad_time_ms": 377.756, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 464.3078308105469, "policy_loss": -0.15784205496311188, "vf_explained_var": 0.021135879680514336, "entropy": 13.235689163208008, "cur_lr": 4.999999873689376e-05, "total_loss": 464.1710205078125, "kl": 0.013846870511770248}, "load_time_ms": 0.724, "num_steps_sampled": 211200, "update_time_ms": 2.543}, "training_iteration": 176, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.22780799865723, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 211200, "timesteps_total": 211200, "custom_metrics": {}, "iterations_since_restore": 176, "episodes_this_iter": 27, "episode_reward_min": -93.82099905489598, "date": "2025-09-04_18-10-07", "episode_reward_max": 8.000109714939725, "pid": 3651948, "timestamp": 1757002207, "episode_reward_mean": -71.104299140994, "time_total_s": 7037.028426885605, "episodes_total": 4511, "episode_len_mean": 41.46}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 7070.750869989395, "info": {"sample_time_ms": 33268.921, "num_steps_trained": 212400, "grad_time_ms": 377.48, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 520.8988647460938, "policy_loss": -0.14671167731285095, "vf_explained_var": 0.02725188620388508, "entropy": 13.615344047546387, "cur_lr": 4.999999873689376e-05, "total_loss": 520.772705078125, "kl": 0.01349978893995285}, "load_time_ms": 0.724, "num_steps_sampled": 212400, "update_time_ms": 2.5}, "training_iteration": 177, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.72244310379028, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 212400, "timesteps_total": 212400, "custom_metrics": {}, "iterations_since_restore": 177, "episodes_this_iter": 30, "episode_reward_min": -94.34293914620837, "date": "2025-09-04_18-10-40", "episode_reward_max": 8.000109714939725, "pid": 3651948, "timestamp": 1757002240, "episode_reward_mean": -67.67515993134072, "time_total_s": 7070.750869989395, "episodes_total": 4541, "episode_len_mean": 40.06}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 7105.745890855789, "info": {"sample_time_ms": 33374.45, "num_steps_trained": 213600, "grad_time_ms": 377.206, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 487.3653869628906, "policy_loss": -0.1492527723312378, "vf_explained_var": 0.019449617713689804, "entropy": 13.086959838867188, "cur_lr": 4.999999873689376e-05, "total_loss": 487.2371520996094, "kl": 0.013814833015203476}, "load_time_ms": 0.728, "num_steps_sampled": 213600, "update_time_ms": 2.473}, "training_iteration": 178, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.99502086639404, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 213600, "timesteps_total": 213600, "custom_metrics": {}, "iterations_since_restore": 178, "episodes_this_iter": 29, "episode_reward_min": -94.34293914620837, "date": "2025-09-04_18-11-15", "episode_reward_max": 8.000000429594232, "pid": 3651948, "timestamp": 1757002275, "episode_reward_mean": -68.10825736939901, "time_total_s": 7105.745890855789, "episodes_total": 4570, "episode_len_mean": 40.38}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 7139.0679042339325, "info": {"sample_time_ms": 33363.25, "num_steps_trained": 214800, "grad_time_ms": 376.485, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 482.8249206542969, "policy_loss": -0.16203825175762177, "vf_explained_var": 0.027035892009735107, "entropy": 12.97227954864502, "cur_lr": 4.999999873689376e-05, "total_loss": 482.6842346191406, "kl": 0.014075911603868008}, "load_time_ms": 0.711, "num_steps_sampled": 214800, "update_time_ms": 2.498}, "training_iteration": 179, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.32201337814331, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 214800, "timesteps_total": 214800, "custom_metrics": {}, "iterations_since_restore": 179, "episodes_this_iter": 27, "episode_reward_min": -95.53219191490898, "date": "2025-09-04_18-11-49", "episode_reward_max": 6.00005790227189, "pid": 3651948, "timestamp": 1757002309, "episode_reward_mean": -71.26538427002218, "time_total_s": 7139.0679042339325, "episodes_total": 4597, "episode_len_mean": 41.74}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 7172.459059238434, "info": {"sample_time_ms": 33369.89, "num_steps_trained": 216000, "grad_time_ms": 372.917, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 475.3916015625, "policy_loss": -0.16252401471138, "vf_explained_var": 0.03283761069178581, "entropy": 12.606663703918457, "cur_lr": 4.999999873689376e-05, "total_loss": 475.2503356933594, "kl": 0.014003436081111431}, "load_time_ms": 0.684, "num_steps_sampled": 216000, "update_time_ms": 2.474}, "training_iteration": 180, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.39115500450134, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 216000, "timesteps_total": 216000, "custom_metrics": {}, "iterations_since_restore": 180, "episodes_this_iter": 29, "episode_reward_min": -95.53219191490898, "date": "2025-09-04_18-12-22", "episode_reward_max": 6.00005790227189, "pid": 3651948, "timestamp": 1757002342, "episode_reward_mean": -73.30657166827612, "time_total_s": 7172.459059238434, "episodes_total": 4626, "episode_len_mean": 42.73}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 7206.650817155838, "info": {"sample_time_ms": 33436.797, "num_steps_trained": 217200, "grad_time_ms": 370.619, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 506.8238525390625, "policy_loss": -0.1711304485797882, "vf_explained_var": 0.027346935123205185, "entropy": 12.956379890441895, "cur_lr": 4.999999873689376e-05, "total_loss": 506.6749572753906, "kl": 0.01460947748273611}, "load_time_ms": 0.685, "num_steps_sampled": 217200, "update_time_ms": 2.53}, "training_iteration": 181, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.191757917404175, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 217200, "timesteps_total": 217200, "custom_metrics": {}, "iterations_since_restore": 181, "episodes_this_iter": 31, "episode_reward_min": -95.53219191490898, "date": "2025-09-04_18-12-56", "episode_reward_max": 8.000000440074153, "pid": 3651948, "timestamp": 1757002376, "episode_reward_mean": -70.12293610759806, "time_total_s": 7206.650817155838, "episodes_total": 4657, "episode_len_mean": 41.11}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 7240.108816862106, "info": {"sample_time_ms": 33437.607, "num_steps_trained": 218400, "grad_time_ms": 370.449, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 408.64678955078125, "policy_loss": -0.15670305490493774, "vf_explained_var": 0.03384535014629364, "entropy": 13.208443641662598, "cur_lr": 4.999999873689376e-05, "total_loss": 408.5120544433594, "kl": 0.014430741779506207}, "load_time_ms": 0.684, "num_steps_sampled": 218400, "update_time_ms": 2.524}, "training_iteration": 182, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.45799970626831, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 218400, "timesteps_total": 218400, "custom_metrics": {}, "iterations_since_restore": 182, "episodes_this_iter": 27, "episode_reward_min": -94.45431820690045, "date": "2025-09-04_18-13-30", "episode_reward_max": 8.000000440074153, "pid": 3651948, "timestamp": 1757002410, "episode_reward_mean": -70.88982028946353, "time_total_s": 7240.108816862106, "episodes_total": 4684, "episode_len_mean": 41.66}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 7273.374994516373, "info": {"sample_time_ms": 33439.775, "num_steps_trained": 219600, "grad_time_ms": 368.62, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 541.272705078125, "policy_loss": -0.1531658172607422, "vf_explained_var": 0.011464131996035576, "entropy": 12.912820816040039, "cur_lr": 4.999999873689376e-05, "total_loss": 541.1414184570312, "kl": 0.014355059713125229}, "load_time_ms": 0.685, "num_steps_sampled": 219600, "update_time_ms": 2.562}, "training_iteration": 183, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.26617765426636, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 219600, "timesteps_total": 219600, "custom_metrics": {}, "iterations_since_restore": 183, "episodes_this_iter": 28, "episode_reward_min": -95.21932780078414, "date": "2025-09-04_18-14-03", "episode_reward_max": 8.000000440074153, "pid": 3651948, "timestamp": 1757002443, "episode_reward_mean": -71.28978875642596, "time_total_s": 7273.374994516373, "episodes_total": 4712, "episode_len_mean": 41.93}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 7307.477123260498, "info": {"sample_time_ms": 33497.021, "num_steps_trained": 220800, "grad_time_ms": 367.646, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 559.41015625, "policy_loss": -0.1447771191596985, "vf_explained_var": 0.022654525935649872, "entropy": 12.68217945098877, "cur_lr": 4.999999873689376e-05, "total_loss": 559.286865234375, "kl": 0.014179195277392864}, "load_time_ms": 0.669, "num_steps_sampled": 220800, "update_time_ms": 2.602}, "training_iteration": 184, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.102128744125366, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 220800, "timesteps_total": 220800, "custom_metrics": {}, "iterations_since_restore": 184, "episodes_this_iter": 32, "episode_reward_min": -95.21932780078414, "date": "2025-09-04_18-14-37", "episode_reward_max": 6.0000256872259685, "pid": 3651948, "timestamp": 1757002477, "episode_reward_mean": -68.85681651830801, "time_total_s": 7307.477123260498, "episodes_total": 4744, "episode_len_mean": 40.73}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 7341.242619752884, "info": {"sample_time_ms": 33368.626, "num_steps_trained": 222000, "grad_time_ms": 367.768, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 474.8887939453125, "policy_loss": -0.17067062854766846, "vf_explained_var": 0.029077045619487762, "entropy": 12.845396041870117, "cur_lr": 4.999999873689376e-05, "total_loss": 474.73822021484375, "kl": 0.013275043107569218}, "load_time_ms": 0.665, "num_steps_sampled": 222000, "update_time_ms": 2.562}, "training_iteration": 185, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.765496492385864, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 222000, "timesteps_total": 222000, "custom_metrics": {}, "iterations_since_restore": 185, "episodes_this_iter": 31, "episode_reward_min": -95.21932780078414, "date": "2025-09-04_18-15-11", "episode_reward_max": 6.0000256872259685, "pid": 3651948, "timestamp": 1757002511, "episode_reward_mean": -68.1395418292209, "time_total_s": 7341.242619752884, "episodes_total": 4775, "episode_len_mean": 40.19}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 7376.1856777668, "info": {"sample_time_ms": 33539.186, "num_steps_trained": 223200, "grad_time_ms": 368.711, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 506.6676940917969, "policy_loss": -0.16878332197666168, "vf_explained_var": 0.018831439316272736, "entropy": 12.581832885742188, "cur_lr": 4.999999873689376e-05, "total_loss": 506.5218200683594, "kl": 0.015099719166755676}, "load_time_ms": 0.655, "num_steps_sampled": 223200, "update_time_ms": 2.574}, "training_iteration": 186, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.943058013916016, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 223200, "timesteps_total": 223200, "custom_metrics": {}, "iterations_since_restore": 186, "episodes_this_iter": 27, "episode_reward_min": -95.83189376358192, "date": "2025-09-04_18-15-46", "episode_reward_max": 6.000001728989278, "pid": 3651948, "timestamp": 1757002546, "episode_reward_mean": -69.54346150398968, "time_total_s": 7376.1856777668, "episodes_total": 4802, "episode_len_mean": 40.66}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 7409.781413793564, "info": {"sample_time_ms": 33527.805, "num_steps_trained": 224400, "grad_time_ms": 367.409, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 461.66259765625, "policy_loss": -0.15208211541175842, "vf_explained_var": 0.024997631087899208, "entropy": 13.037174224853516, "cur_lr": 4.999999873689376e-05, "total_loss": 461.5320129394531, "kl": 0.014166755601763725}, "load_time_ms": 0.661, "num_steps_sampled": 224400, "update_time_ms": 2.574}, "training_iteration": 187, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.595736026763916, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 224400, "timesteps_total": 224400, "custom_metrics": {}, "iterations_since_restore": 187, "episodes_this_iter": 32, "episode_reward_min": -95.83189376358192, "date": "2025-09-04_18-16-20", "episode_reward_max": 2.0001916476801034, "pid": 3651948, "timestamp": 1757002580, "episode_reward_mean": -70.26994574194738, "time_total_s": 7409.781413793564, "episodes_total": 4834, "episode_len_mean": 40.89}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 7443.122005939484, "info": {"sample_time_ms": 33364.201, "num_steps_trained": 225600, "grad_time_ms": 365.665, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 431.95404052734375, "policy_loss": -0.16554684937000275, "vf_explained_var": 0.03322778642177582, "entropy": 12.807843208312988, "cur_lr": 4.999999873689376e-05, "total_loss": 431.8093566894531, "kl": 0.013717424124479294}, "load_time_ms": 0.659, "num_steps_sampled": 225600, "update_time_ms": 2.571}, "training_iteration": 188, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.3405921459198, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 225600, "timesteps_total": 225600, "custom_metrics": {}, "iterations_since_restore": 188, "episodes_this_iter": 26, "episode_reward_min": -95.83189376358192, "date": "2025-09-04_18-16-53", "episode_reward_max": 1.7789538112237562, "pid": 3651948, "timestamp": 1757002613, "episode_reward_mean": -70.59327772699594, "time_total_s": 7443.122005939484, "episodes_total": 4860, "episode_len_mean": 41.15}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 7476.492438316345, "info": {"sample_time_ms": 33368.417, "num_steps_trained": 226800, "grad_time_ms": 366.289, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 455.0348205566406, "policy_loss": -0.15835967659950256, "vf_explained_var": 0.03563562408089638, "entropy": 12.52796745300293, "cur_lr": 4.999999873689376e-05, "total_loss": 454.89776611328125, "kl": 0.014046341180801392}, "load_time_ms": 0.663, "num_steps_sampled": 226800, "update_time_ms": 2.535}, "training_iteration": 189, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.37043237686157, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 226800, "timesteps_total": 226800, "custom_metrics": {}, "iterations_since_restore": 189, "episodes_this_iter": 28, "episode_reward_min": -95.40900359037315, "date": "2025-09-04_18-17-26", "episode_reward_max": 1.7789538112237562, "pid": 3651948, "timestamp": 1757002646, "episode_reward_mean": -72.52180366277011, "time_total_s": 7476.492438316345, "episodes_total": 4888, "episode_len_mean": 42.0}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 7509.6339473724365, "info": {"sample_time_ms": 33340.973, "num_steps_trained": 228000, "grad_time_ms": 368.748, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 447.1451721191406, "policy_loss": -0.1697927862405777, "vf_explained_var": 0.04534539952874184, "entropy": 12.65049934387207, "cur_lr": 4.999999873689376e-05, "total_loss": 446.9970397949219, "kl": 0.014240365475416183}, "load_time_ms": 0.672, "num_steps_sampled": 228000, "update_time_ms": 2.524}, "training_iteration": 190, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.14150905609131, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 228000, "timesteps_total": 228000, "custom_metrics": {}, "iterations_since_restore": 190, "episodes_this_iter": 29, "episode_reward_min": -93.99593714288171, "date": "2025-09-04_18-18-00", "episode_reward_max": 4.000255539698237, "pid": 3651948, "timestamp": 1757002680, "episode_reward_mean": -73.6454396869818, "time_total_s": 7509.6339473724365, "episodes_total": 4917, "episode_len_mean": 42.72}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 7543.576703071594, "info": {"sample_time_ms": 33313.244, "num_steps_trained": 229200, "grad_time_ms": 371.637, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 435.88616943359375, "policy_loss": -0.16152003407478333, "vf_explained_var": 0.028656788170337677, "entropy": 12.934611320495605, "cur_lr": 4.999999873689376e-05, "total_loss": 435.74530029296875, "kl": 0.013617919757962227}, "load_time_ms": 0.675, "num_steps_sampled": 229200, "update_time_ms": 2.463}, "training_iteration": 191, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.942755699157715, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 229200, "timesteps_total": 229200, "custom_metrics": {}, "iterations_since_restore": 191, "episodes_this_iter": 28, "episode_reward_min": -93.99593714288171, "date": "2025-09-04_18-18-34", "episode_reward_max": 4.000257012599587, "pid": 3651948, "timestamp": 1757002714, "episode_reward_mean": -73.94141473983446, "time_total_s": 7543.576703071594, "episodes_total": 4945, "episode_len_mean": 43.03}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 7578.093836784363, "info": {"sample_time_ms": 33416.772, "num_steps_trained": 230400, "grad_time_ms": 373.977, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 457.99609375, "policy_loss": -0.16294731199741364, "vf_explained_var": 0.02682061307132244, "entropy": 13.026744842529297, "cur_lr": 4.999999873689376e-05, "total_loss": 457.8548278808594, "kl": 0.01424330659210682}, "load_time_ms": 0.682, "num_steps_sampled": 230400, "update_time_ms": 2.478}, "training_iteration": 192, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.517133712768555, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 230400, "timesteps_total": 230400, "custom_metrics": {}, "iterations_since_restore": 192, "episodes_this_iter": 27, "episode_reward_min": -95.5472888620737, "date": "2025-09-04_18-19-08", "episode_reward_max": 4.000257012599587, "pid": 3651948, "timestamp": 1757002748, "episode_reward_mean": -73.16517307924585, "time_total_s": 7578.093836784363, "episodes_total": 4972, "episode_len_mean": 42.85}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 7611.424062490463, "info": {"sample_time_ms": 33420.426, "num_steps_trained": 231600, "grad_time_ms": 376.692, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 509.8204345703125, "policy_loss": -0.16708210110664368, "vf_explained_var": 0.019589563831686974, "entropy": 12.410161972045898, "cur_lr": 4.999999873689376e-05, "total_loss": 509.67681884765625, "kl": 0.015443297103047371}, "load_time_ms": 0.701, "num_steps_sampled": 231600, "update_time_ms": 2.48}, "training_iteration": 193, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.330225706100464, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 231600, "timesteps_total": 231600, "custom_metrics": {}, "iterations_since_restore": 193, "episodes_this_iter": 29, "episode_reward_min": -95.5472888620737, "date": "2025-09-04_18-19-41", "episode_reward_max": 4.000257012599587, "pid": 3651948, "timestamp": 1757002781, "episode_reward_mean": -72.8662915684726, "time_total_s": 7611.424062490463, "episodes_total": 5001, "episode_len_mean": 42.7}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 7644.909424304962, "info": {"sample_time_ms": 33358.604, "num_steps_trained": 232800, "grad_time_ms": 376.924, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 483.5562744140625, "policy_loss": -0.15306442975997925, "vf_explained_var": 0.02964412420988083, "entropy": 12.852642059326172, "cur_lr": 4.999999873689376e-05, "total_loss": 483.4244384765625, "kl": 0.013986443169414997}, "load_time_ms": 0.712, "num_steps_sampled": 232800, "update_time_ms": 2.419}, "training_iteration": 194, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.4853618144989, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 232800, "timesteps_total": 232800, "custom_metrics": {}, "iterations_since_restore": 194, "episodes_this_iter": 24, "episode_reward_min": -95.5472888620737, "date": "2025-09-04_18-20-15", "episode_reward_max": 4.000017886379702, "pid": 3651948, "timestamp": 1757002815, "episode_reward_mean": -76.33770108549413, "time_total_s": 7644.909424304962, "episodes_total": 5025, "episode_len_mean": 44.22}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 7679.199877500534, "info": {"sample_time_ms": 33411.757, "num_steps_trained": 234000, "grad_time_ms": 376.213, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 506.1232604980469, "policy_loss": -0.1687338948249817, "vf_explained_var": 0.03247503936290741, "entropy": 12.751094818115234, "cur_lr": 4.999999873689376e-05, "total_loss": 505.9776916503906, "kl": 0.015258345752954483}, "load_time_ms": 0.708, "num_steps_sampled": 234000, "update_time_ms": 2.455}, "training_iteration": 195, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.2904531955719, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 234000, "timesteps_total": 234000, "custom_metrics": {}, "iterations_since_restore": 195, "episodes_this_iter": 31, "episode_reward_min": -96.21196756648438, "date": "2025-09-04_18-20-49", "episode_reward_max": 4.000017886379702, "pid": 3651948, "timestamp": 1757002849, "episode_reward_mean": -74.40327719050362, "time_total_s": 7679.199877500534, "episodes_total": 5056, "episode_len_mean": 43.19}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 7713.31763625145, "info": {"sample_time_ms": 33330.199, "num_steps_trained": 235200, "grad_time_ms": 375.18, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 442.1346435546875, "policy_loss": -0.16684409976005554, "vf_explained_var": 0.031153075397014618, "entropy": 12.824676513671875, "cur_lr": 4.999999873689376e-05, "total_loss": 441.98858642578125, "kl": 0.013674840331077576}, "load_time_ms": 0.716, "num_steps_sampled": 235200, "update_time_ms": 2.505}, "training_iteration": 196, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.11775875091553, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 235200, "timesteps_total": 235200, "custom_metrics": {}, "iterations_since_restore": 196, "episodes_this_iter": 30, "episode_reward_min": -96.21196756648438, "date": "2025-09-04_18-21-23", "episode_reward_max": 6.002070167660171, "pid": 3651948, "timestamp": 1757002883, "episode_reward_mean": -72.27055099438103, "time_total_s": 7713.31763625145, "episodes_total": 5086, "episode_len_mean": 42.12}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 7746.8098311424255, "info": {"sample_time_ms": 33318.688, "num_steps_trained": 236400, "grad_time_ms": 376.25, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 504.9436340332031, "policy_loss": -0.1752660572528839, "vf_explained_var": 0.052433982491493225, "entropy": 12.876564979553223, "cur_lr": 4.999999873689376e-05, "total_loss": 504.7928161621094, "kl": 0.01613185554742813}, "load_time_ms": 0.715, "num_steps_sampled": 236400, "update_time_ms": 2.528}, "training_iteration": 197, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.49219489097595, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 236400, "timesteps_total": 236400, "custom_metrics": {}, "iterations_since_restore": 197, "episodes_this_iter": 34, "episode_reward_min": -96.21196756648438, "date": "2025-09-04_18-21-57", "episode_reward_max": 8.000242692043646, "pid": 3651948, "timestamp": 1757002917, "episode_reward_mean": -65.35815674165814, "time_total_s": 7746.8098311424255, "episodes_total": 5120, "episode_len_mean": 39.21}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 7780.365607500076, "info": {"sample_time_ms": 33339.802, "num_steps_trained": 237600, "grad_time_ms": 376.558, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 416.38665771484375, "policy_loss": -0.1583482027053833, "vf_explained_var": 0.008243918418884277, "entropy": 13.000116348266602, "cur_lr": 4.999999873689376e-05, "total_loss": 416.25006103515625, "kl": 0.01429493073374033}, "load_time_ms": 0.714, "num_steps_sampled": 237600, "update_time_ms": 2.613}, "training_iteration": 198, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.55577635765076, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 237600, "timesteps_total": 237600, "custom_metrics": {}, "iterations_since_restore": 198, "episodes_this_iter": 25, "episode_reward_min": -96.21196756648438, "date": "2025-09-04_18-22-30", "episode_reward_max": 8.000242692043646, "pid": 3651948, "timestamp": 1757002950, "episode_reward_mean": -68.68605460087782, "time_total_s": 7780.365607500076, "episodes_total": 5145, "episode_len_mean": 41.06}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 7813.889029741287, "info": {"sample_time_ms": 33355.593, "num_steps_trained": 238800, "grad_time_ms": 376.104, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 456.09765625, "policy_loss": -0.15889257192611694, "vf_explained_var": 0.02178768254816532, "entropy": 12.65239429473877, "cur_lr": 4.999999873689376e-05, "total_loss": 455.9606628417969, "kl": 0.014413093216717243}, "load_time_ms": 0.714, "num_steps_sampled": 238800, "update_time_ms": 2.603}, "training_iteration": 199, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.52342224121094, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 238800, "timesteps_total": 238800, "custom_metrics": {}, "iterations_since_restore": 199, "episodes_this_iter": 27, "episode_reward_min": -95.60804949833211, "date": "2025-09-04_18-23-04", "episode_reward_max": 8.000242692043646, "pid": 3651948, "timestamp": 1757002984, "episode_reward_mean": -70.32519611813332, "time_total_s": 7813.889029741287, "episodes_total": 5172, "episode_len_mean": 41.79}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 7846.953207492828, "info": {"sample_time_ms": 33347.681, "num_steps_trained": 240000, "grad_time_ms": 376.3, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 485.8211669921875, "policy_loss": -0.1690763682126999, "vf_explained_var": 0.02998235449194908, "entropy": 12.54977798461914, "cur_lr": 4.999999873689376e-05, "total_loss": 485.6736755371094, "kl": 0.014191006310284138}, "load_time_ms": 0.703, "num_steps_sampled": 240000, "update_time_ms": 2.589}, "training_iteration": 200, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.06417775154114, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 240000, "timesteps_total": 240000, "custom_metrics": {}, "iterations_since_restore": 200, "episodes_this_iter": 30, "episode_reward_min": -95.60804949833211, "date": "2025-09-04_18-23-37", "episode_reward_max": 1.0952821156691535, "pid": 3651948, "timestamp": 1757003017, "episode_reward_mean": -69.21935034442157, "time_total_s": 7846.953207492828, "episodes_total": 5202, "episode_len_mean": 41.4}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 7881.645069122314, "info": {"sample_time_ms": 33423.48, "num_steps_trained": 241200, "grad_time_ms": 375.39, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 438.78692626953125, "policy_loss": -0.16150593757629395, "vf_explained_var": 0.02985469438135624, "entropy": 12.441953659057617, "cur_lr": 4.999999873689376e-05, "total_loss": 438.64642333984375, "kl": 0.013840895146131516}, "load_time_ms": 0.694, "num_steps_sampled": 241200, "update_time_ms": 2.612}, "training_iteration": 201, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.691861629486084, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 241200, "timesteps_total": 241200, "custom_metrics": {}, "iterations_since_restore": 201, "episodes_this_iter": 28, "episode_reward_min": -95.60804949833211, "date": "2025-09-04_18-24-12", "episode_reward_max": 0.0010491070470486363, "pid": 3651948, "timestamp": 1757003052, "episode_reward_mean": -72.67710039707534, "time_total_s": 7881.645069122314, "episodes_total": 5230, "episode_len_mean": 43.11}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 7915.277950763702, "info": {"sample_time_ms": 33336.976, "num_steps_trained": 242400, "grad_time_ms": 373.51, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 528.7056274414062, "policy_loss": -0.164224773645401, "vf_explained_var": 0.013893438503146172, "entropy": 12.260029792785645, "cur_lr": 4.999999873689376e-05, "total_loss": 528.563232421875, "kl": 0.014393393881618977}, "load_time_ms": 0.692, "num_steps_sampled": 242400, "update_time_ms": 2.605}, "training_iteration": 202, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.63288164138794, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 242400, "timesteps_total": 242400, "custom_metrics": {}, "iterations_since_restore": 202, "episodes_this_iter": 34, "episode_reward_min": -93.56909818892126, "date": "2025-09-04_18-24-46", "episode_reward_max": 8.000011402220146, "pid": 3651948, "timestamp": 1757003086, "episode_reward_mean": -65.56545058732408, "time_total_s": 7915.277950763702, "episodes_total": 5264, "episode_len_mean": 39.49}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 7948.64150595665, "info": {"sample_time_ms": 33341.681, "num_steps_trained": 243600, "grad_time_ms": 372.193, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 504.7474365234375, "policy_loss": -0.14668802917003632, "vf_explained_var": 0.030628588050603867, "entropy": 12.595661163330078, "cur_lr": 4.999999873689376e-05, "total_loss": 504.6220703125, "kl": 0.014047231525182724}, "load_time_ms": 0.673, "num_steps_sampled": 243600, "update_time_ms": 2.623}, "training_iteration": 203, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.36355519294739, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 243600, "timesteps_total": 243600, "custom_metrics": {}, "iterations_since_restore": 203, "episodes_this_iter": 29, "episode_reward_min": -93.56909818892126, "date": "2025-09-04_18-25-19", "episode_reward_max": 8.000011402220146, "pid": 3651948, "timestamp": 1757003119, "episode_reward_mean": -65.31403438116699, "time_total_s": 7948.64150595665, "episodes_total": 5293, "episode_len_mean": 39.22}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 7982.057063341141, "info": {"sample_time_ms": 33334.772, "num_steps_trained": 244800, "grad_time_ms": 372.076, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 450.298583984375, "policy_loss": -0.16483943164348602, "vf_explained_var": 0.06417058408260345, "entropy": 12.538618087768555, "cur_lr": 4.999999873689376e-05, "total_loss": 450.15496826171875, "kl": 0.013958992436528206}, "load_time_ms": 0.67, "num_steps_sampled": 244800, "update_time_ms": 2.644}, "training_iteration": 204, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.41555738449097, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 244800, "timesteps_total": 244800, "custom_metrics": {}, "iterations_since_restore": 204, "episodes_this_iter": 33, "episode_reward_min": -93.19348353972086, "date": "2025-09-04_18-25-52", "episode_reward_max": 8.000032462470926, "pid": 3651948, "timestamp": 1757003152, "episode_reward_mean": -63.282461117190714, "time_total_s": 7982.057063341141, "episodes_total": 5326, "episode_len_mean": 38.18}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 8015.771010875702, "info": {"sample_time_ms": 33278.228, "num_steps_trained": 246000, "grad_time_ms": 371.012, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 446.9205322265625, "policy_loss": -0.15768620371818542, "vf_explained_var": 0.036962032318115234, "entropy": 12.585735321044922, "cur_lr": 4.999999873689376e-05, "total_loss": 446.7854309082031, "kl": 0.01488898042589426}, "load_time_ms": 0.671, "num_steps_sampled": 246000, "update_time_ms": 2.653}, "training_iteration": 205, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.71394753456116, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 246000, "timesteps_total": 246000, "custom_metrics": {}, "iterations_since_restore": 205, "episodes_this_iter": 28, "episode_reward_min": -93.63480907981032, "date": "2025-09-04_18-26-26", "episode_reward_max": 8.000032462470926, "pid": 3651948, "timestamp": 1757003186, "episode_reward_mean": -65.35059535319327, "time_total_s": 8015.771010875702, "episodes_total": 5354, "episode_len_mean": 39.43}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 8049.1763389110565, "info": {"sample_time_ms": 33207.42, "num_steps_trained": 247200, "grad_time_ms": 370.664, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 475.2734375, "policy_loss": -0.15967623889446259, "vf_explained_var": 0.033230237662792206, "entropy": 12.491169929504395, "cur_lr": 4.999999873689376e-05, "total_loss": 475.1365966796875, "kl": 0.015001079998910427}, "load_time_ms": 0.659, "num_steps_sampled": 247200, "update_time_ms": 2.602}, "training_iteration": 206, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.405328035354614, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 247200, "timesteps_total": 247200, "custom_metrics": {}, "iterations_since_restore": 206, "episodes_this_iter": 29, "episode_reward_min": -95.0799406703004, "date": "2025-09-04_18-26-59", "episode_reward_max": 8.000032462470926, "pid": 3651948, "timestamp": 1757003219, "episode_reward_mean": -67.66203927731692, "time_total_s": 8049.1763389110565, "episodes_total": 5383, "episode_len_mean": 40.71}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 8083.296813249588, "info": {"sample_time_ms": 33269.992, "num_steps_trained": 248400, "grad_time_ms": 370.995, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 446.5125427246094, "policy_loss": -0.17522785067558289, "vf_explained_var": 0.03150990977883339, "entropy": 12.790533065795898, "cur_lr": 4.999999873689376e-05, "total_loss": 446.3612060546875, "kl": 0.015740180388092995}, "load_time_ms": 0.67, "num_steps_sampled": 248400, "update_time_ms": 2.575}, "training_iteration": 207, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.120474338531494, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 248400, "timesteps_total": 248400, "custom_metrics": {}, "iterations_since_restore": 207, "episodes_this_iter": 29, "episode_reward_min": -95.0799406703004, "date": "2025-09-04_18-27-34", "episode_reward_max": 8.000032462470926, "pid": 3651948, "timestamp": 1757003254, "episode_reward_mean": -68.76872612698419, "time_total_s": 8083.296813249588, "episodes_total": 5412, "episode_len_mean": 41.03}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 8116.955354452133, "info": {"sample_time_ms": 33280.061, "num_steps_trained": 249600, "grad_time_ms": 371.251, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 547.6825561523438, "policy_loss": -0.1566251516342163, "vf_explained_var": 0.02517450600862503, "entropy": 12.358968734741211, "cur_lr": 4.999999873689376e-05, "total_loss": 547.5484008789062, "kl": 0.014785553328692913}, "load_time_ms": 0.675, "num_steps_sampled": 249600, "update_time_ms": 2.515}, "training_iteration": 208, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.658541202545166, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 249600, "timesteps_total": 249600, "custom_metrics": {}, "iterations_since_restore": 208, "episodes_this_iter": 29, "episode_reward_min": -95.0799406703004, "date": "2025-09-04_18-28-07", "episode_reward_max": 4.000291038650434, "pid": 3651948, "timestamp": 1757003287, "episode_reward_mean": -68.98315664091089, "time_total_s": 8116.955354452133, "episodes_total": 5441, "episode_len_mean": 41.1}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 8151.842911720276, "info": {"sample_time_ms": 33417.949, "num_steps_trained": 250800, "grad_time_ms": 369.804, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 447.9426574707031, "policy_loss": -0.16055484116077423, "vf_explained_var": 0.016253961250185966, "entropy": 12.735525131225586, "cur_lr": 4.999999873689376e-05, "total_loss": 447.80487060546875, "kl": 0.015008926391601562}, "load_time_ms": 0.671, "num_steps_sampled": 250800, "update_time_ms": 2.51}, "training_iteration": 209, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.8875572681427, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 250800, "timesteps_total": 250800, "custom_metrics": {}, "iterations_since_restore": 209, "episodes_this_iter": 28, "episode_reward_min": -93.77040153773555, "date": "2025-09-04_18-28-42", "episode_reward_max": 4.000291038650434, "pid": 3651948, "timestamp": 1757003322, "episode_reward_mean": -70.58435784835198, "time_total_s": 8151.842911720276, "episodes_total": 5469, "episode_len_mean": 41.72}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 8185.894082307816, "info": {"sample_time_ms": 33518.732, "num_steps_trained": 252000, "grad_time_ms": 367.734, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 510.38629150390625, "policy_loss": -0.16351114213466644, "vf_explained_var": 0.025241592898964882, "entropy": 12.427091598510742, "cur_lr": 4.999999873689376e-05, "total_loss": 510.2453308105469, "kl": 0.014884104020893574}, "load_time_ms": 0.687, "num_steps_sampled": 252000, "update_time_ms": 2.513}, "training_iteration": 210, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.05117058753967, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 252000, "timesteps_total": 252000, "custom_metrics": {}, "iterations_since_restore": 210, "episodes_this_iter": 33, "episode_reward_min": -93.77040153773555, "date": "2025-09-04_18-29-16", "episode_reward_max": 6.000473203830543, "pid": 3651948, "timestamp": 1757003356, "episode_reward_mean": -68.06821898003427, "time_total_s": 8185.894082307816, "episodes_total": 5502, "episode_len_mean": 40.71}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 8219.247455835342, "info": {"sample_time_ms": 33386.554, "num_steps_trained": 253200, "grad_time_ms": 366.039, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 449.7999267578125, "policy_loss": -0.1619112491607666, "vf_explained_var": 0.03780033811926842, "entropy": 12.094733238220215, "cur_lr": 4.999999873689376e-05, "total_loss": 449.66009521484375, "kl": 0.014554371125996113}, "load_time_ms": 0.687, "num_steps_sampled": 253200, "update_time_ms": 2.46}, "training_iteration": 211, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.353373527526855, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 253200, "timesteps_total": 253200, "custom_metrics": {}, "iterations_since_restore": 211, "episodes_this_iter": 32, "episode_reward_min": -93.6279369839979, "date": "2025-09-04_18-29-50", "episode_reward_max": 8.000000667069283, "pid": 3651948, "timestamp": 1757003390, "episode_reward_mean": -65.20491987533221, "time_total_s": 8219.247455835342, "episodes_total": 5534, "episode_len_mean": 39.3}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 8252.84744977951, "info": {"sample_time_ms": 33385.07, "num_steps_trained": 254400, "grad_time_ms": 364.229, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 497.3307800292969, "policy_loss": -0.16077612340450287, "vf_explained_var": 0.035886500030756, "entropy": 12.381339073181152, "cur_lr": 4.999999873689376e-05, "total_loss": 497.1937255859375, "kl": 0.015604168176651001}, "load_time_ms": 0.679, "num_steps_sampled": 254400, "update_time_ms": 2.486}, "training_iteration": 212, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.59999394416809, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 254400, "timesteps_total": 254400, "custom_metrics": {}, "iterations_since_restore": 212, "episodes_this_iter": 30, "episode_reward_min": -93.6279369839979, "date": "2025-09-04_18-30-23", "episode_reward_max": 8.000000667069283, "pid": 3651948, "timestamp": 1757003423, "episode_reward_mean": -62.45875354009866, "time_total_s": 8252.84744977951, "episodes_total": 5564, "episode_len_mean": 38.05}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 8287.260428905487, "info": {"sample_time_ms": 33490.409, "num_steps_trained": 255600, "grad_time_ms": 363.844, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 512.883056640625, "policy_loss": -0.17092293500900269, "vf_explained_var": 0.027442097663879395, "entropy": 12.155288696289062, "cur_lr": 4.999999873689376e-05, "total_loss": 512.7354736328125, "kl": 0.015318612568080425}, "load_time_ms": 0.677, "num_steps_sampled": 255600, "update_time_ms": 2.464}, "training_iteration": 213, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.41297912597656, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 255600, "timesteps_total": 255600, "custom_metrics": {}, "iterations_since_restore": 213, "episodes_this_iter": 31, "episode_reward_min": -93.6279369839979, "date": "2025-09-04_18-30-58", "episode_reward_max": 8.000000667069283, "pid": 3651948, "timestamp": 1757003458, "episode_reward_mean": -61.96758689936211, "time_total_s": 8287.260428905487, "episodes_total": 5595, "episode_len_mean": 37.75}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 8320.60078382492, "info": {"sample_time_ms": 33482.986, "num_steps_trained": 256800, "grad_time_ms": 363.767, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 459.37249755859375, "policy_loss": -0.16169892251491547, "vf_explained_var": 0.02173599973320961, "entropy": 12.374711036682129, "cur_lr": 4.999999873689376e-05, "total_loss": 459.2326354980469, "kl": 0.014371867291629314}, "load_time_ms": 0.677, "num_steps_sampled": 256800, "update_time_ms": 2.455}, "training_iteration": 214, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.340354919433594, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 256800, "timesteps_total": 256800, "custom_metrics": {}, "iterations_since_restore": 214, "episodes_this_iter": 33, "episode_reward_min": -93.6279369839979, "date": "2025-09-04_18-31-31", "episode_reward_max": 6.000034402189836, "pid": 3651948, "timestamp": 1757003491, "episode_reward_mean": -63.38828660104511, "time_total_s": 8320.60078382492, "episodes_total": 5628, "episode_len_mean": 38.32}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 8354.387178182602, "info": {"sample_time_ms": 33488.327, "num_steps_trained": 258000, "grad_time_ms": 365.69, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 504.76953125, "policy_loss": -0.1607033908367157, "vf_explained_var": 0.014909658581018448, "entropy": 12.613929748535156, "cur_lr": 4.999999873689376e-05, "total_loss": 504.6307373046875, "kl": 0.014423470944166183}, "load_time_ms": 0.691, "num_steps_sampled": 258000, "update_time_ms": 2.411}, "training_iteration": 215, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.786394357681274, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 258000, "timesteps_total": 258000, "custom_metrics": {}, "iterations_since_restore": 215, "episodes_this_iter": 32, "episode_reward_min": -93.63772402806477, "date": "2025-09-04_18-32-05", "episode_reward_max": 6.000006885068439, "pid": 3651948, "timestamp": 1757003525, "episode_reward_mean": -62.11349040983944, "time_total_s": 8354.387178182602, "episodes_total": 5660, "episode_len_mean": 37.78}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 8388.01570558548, "info": {"sample_time_ms": 33508.098, "num_steps_trained": 259200, "grad_time_ms": 368.138, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 484.1788330078125, "policy_loss": -0.16928161680698395, "vf_explained_var": 0.028671972453594208, "entropy": 12.11899471282959, "cur_lr": 4.999999873689376e-05, "total_loss": 484.0320129394531, "kl": 0.01483107265084982}, "load_time_ms": 0.699, "num_steps_sampled": 259200, "update_time_ms": 2.44}, "training_iteration": 216, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.62852740287781, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 259200, "timesteps_total": 259200, "custom_metrics": {}, "iterations_since_restore": 216, "episodes_this_iter": 30, "episode_reward_min": -94.87738322979997, "date": "2025-09-04_18-32-39", "episode_reward_max": 6.000033280248075, "pid": 3651948, "timestamp": 1757003559, "episode_reward_mean": -61.03114630598341, "time_total_s": 8388.01570558548, "episodes_total": 5690, "episode_len_mean": 37.16}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 8421.57034111023, "info": {"sample_time_ms": 33450.914, "num_steps_trained": 260400, "grad_time_ms": 368.741, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 474.89111328125, "policy_loss": -0.16506989300251007, "vf_explained_var": 0.017899474129080772, "entropy": 12.297295570373535, "cur_lr": 4.999999873689376e-05, "total_loss": 474.7478942871094, "kl": 0.014392748475074768}, "load_time_ms": 0.683, "num_steps_sampled": 260400, "update_time_ms": 2.465}, "training_iteration": 217, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.554635524749756, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 260400, "timesteps_total": 260400, "custom_metrics": {}, "iterations_since_restore": 217, "episodes_this_iter": 36, "episode_reward_min": -94.87738322979997, "date": "2025-09-04_18-33-12", "episode_reward_max": 6.000033280248075, "pid": 3651948, "timestamp": 1757003592, "episode_reward_mean": -58.55471967135243, "time_total_s": 8421.57034111023, "episodes_total": 5726, "episode_len_mean": 36.21}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 8455.177167654037, "info": {"sample_time_ms": 33444.751, "num_steps_trained": 261600, "grad_time_ms": 369.745, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 457.5904846191406, "policy_loss": -0.15295682847499847, "vf_explained_var": 0.02529967576265335, "entropy": 12.191746711730957, "cur_lr": 4.999999873689376e-05, "total_loss": 457.4587097167969, "kl": 0.013913111761212349}, "load_time_ms": 0.69, "num_steps_sampled": 261600, "update_time_ms": 2.452}, "training_iteration": 218, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.60682654380798, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 261600, "timesteps_total": 261600, "custom_metrics": {}, "iterations_since_restore": 218, "episodes_this_iter": 30, "episode_reward_min": -94.87738322979997, "date": "2025-09-04_18-33-46", "episode_reward_max": 6.000033280248075, "pid": 3651948, "timestamp": 1757003626, "episode_reward_mean": -59.80524415553662, "time_total_s": 8455.177167654037, "episodes_total": 5756, "episode_len_mean": 36.98}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 8488.736039161682, "info": {"sample_time_ms": 33312.104, "num_steps_trained": 262800, "grad_time_ms": 369.461, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 502.29730224609375, "policy_loss": -0.17624081671237946, "vf_explained_var": 0.02473224513232708, "entropy": 12.325740814208984, "cur_lr": 4.999999873689376e-05, "total_loss": 502.14306640625, "kl": 0.01449984684586525}, "load_time_ms": 0.687, "num_steps_sampled": 262800, "update_time_ms": 2.463}, "training_iteration": 219, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.55887150764465, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 262800, "timesteps_total": 262800, "custom_metrics": {}, "iterations_since_restore": 219, "episodes_this_iter": 32, "episode_reward_min": -92.68477240724081, "date": "2025-09-04_18-34-19", "episode_reward_max": 6.000001307149937, "pid": 3651948, "timestamp": 1757003659, "episode_reward_mean": -58.989134541468914, "time_total_s": 8488.736039161682, "episodes_total": 5788, "episode_len_mean": 36.65}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 8522.322809696198, "info": {"sample_time_ms": 33263.505, "num_steps_trained": 264000, "grad_time_ms": 371.579, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 494.7518005371094, "policy_loss": -0.17339009046554565, "vf_explained_var": 0.019748859107494354, "entropy": 11.994256973266602, "cur_lr": 4.999999873689376e-05, "total_loss": 494.6004638671875, "kl": 0.014561583288013935}, "load_time_ms": 0.681, "num_steps_sampled": 264000, "update_time_ms": 2.506}, "training_iteration": 220, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.58677053451538, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 264000, "timesteps_total": 264000, "custom_metrics": {}, "iterations_since_restore": 220, "episodes_this_iter": 33, "episode_reward_min": -92.68477240724081, "date": "2025-09-04_18-34-53", "episode_reward_max": 6.000001307149937, "pid": 3651948, "timestamp": 1757003693, "episode_reward_mean": -61.583950956835054, "time_total_s": 8522.322809696198, "episodes_total": 5821, "episode_len_mean": 37.68}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 8556.517718076706, "info": {"sample_time_ms": 33347.762, "num_steps_trained": 265200, "grad_time_ms": 371.472, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 480.7444763183594, "policy_loss": -0.17422175407409668, "vf_explained_var": 0.03855053707957268, "entropy": 12.05868911743164, "cur_lr": 4.999999873689376e-05, "total_loss": 480.5936279296875, "kl": 0.015407336875796318}, "load_time_ms": 0.685, "num_steps_sampled": 265200, "update_time_ms": 2.58}, "training_iteration": 221, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.19490838050842, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 265200, "timesteps_total": 265200, "custom_metrics": {}, "iterations_since_restore": 221, "episodes_this_iter": 34, "episode_reward_min": -92.68477240724081, "date": "2025-09-04_18-35-27", "episode_reward_max": 6.000001307149937, "pid": 3651948, "timestamp": 1757003727, "episode_reward_mean": -58.61488066202075, "time_total_s": 8556.517718076706, "episodes_total": 5855, "episode_len_mean": 36.22}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 8590.381103038788, "info": {"sample_time_ms": 33373.788, "num_steps_trained": 266400, "grad_time_ms": 371.794, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 465.3026123046875, "policy_loss": -0.16076049208641052, "vf_explained_var": 0.013659258373081684, "entropy": 12.263897895812988, "cur_lr": 4.999999873689376e-05, "total_loss": 465.1667175292969, "kl": 0.016368364915251732}, "load_time_ms": 0.695, "num_steps_sampled": 266400, "update_time_ms": 2.548}, "training_iteration": 222, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.86338496208191, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 266400, "timesteps_total": 266400, "custom_metrics": {}, "iterations_since_restore": 222, "episodes_this_iter": 34, "episode_reward_min": -92.65719252126992, "date": "2025-09-04_18-36-01", "episode_reward_max": 8.000000506173045, "pid": 3651948, "timestamp": 1757003761, "episode_reward_mean": -57.24653802731454, "time_total_s": 8590.381103038788, "episodes_total": 5889, "episode_len_mean": 35.82}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 8624.816487312317, "info": {"sample_time_ms": 33374.658, "num_steps_trained": 267600, "grad_time_ms": 373.171, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 450.2386474609375, "policy_loss": -0.17680014669895172, "vf_explained_var": 0.023019777610898018, "entropy": 11.894817352294922, "cur_lr": 4.999999873689376e-05, "total_loss": 450.0855407714844, "kl": 0.015569154173135757}, "load_time_ms": 0.705, "num_steps_sampled": 267600, "update_time_ms": 2.51}, "training_iteration": 223, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.43538427352905, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 267600, "timesteps_total": 267600, "custom_metrics": {}, "iterations_since_restore": 223, "episodes_this_iter": 34, "episode_reward_min": -92.8331001949819, "date": "2025-09-04_18-36-36", "episode_reward_max": 8.000000506173045, "pid": 3651948, "timestamp": 1757003796, "episode_reward_mean": -56.41039121202906, "time_total_s": 8624.816487312317, "episodes_total": 5923, "episode_len_mean": 35.46}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 8657.887679338455, "info": {"sample_time_ms": 33349.505, "num_steps_trained": 268800, "grad_time_ms": 371.394, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 537.3810424804688, "policy_loss": -0.16604220867156982, "vf_explained_var": 0.01993529684841633, "entropy": 11.660624504089355, "cur_lr": 4.999999873689376e-05, "total_loss": 537.23876953125, "kl": 0.015647679567337036}, "load_time_ms": 0.701, "num_steps_sampled": 268800, "update_time_ms": 2.556}, "training_iteration": 224, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.071192026138306, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 268800, "timesteps_total": 268800, "custom_metrics": {}, "iterations_since_restore": 224, "episodes_this_iter": 29, "episode_reward_min": -93.81865513420475, "date": "2025-09-04_18-37-09", "episode_reward_max": 8.000000506173045, "pid": 3651948, "timestamp": 1757003829, "episode_reward_mean": -59.83576275009328, "time_total_s": 8657.887679338455, "episodes_total": 5952, "episode_len_mean": 36.98}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 8691.416977643967, "info": {"sample_time_ms": 33326.098, "num_steps_trained": 270000, "grad_time_ms": 369.077, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 508.33740234375, "policy_loss": -0.16329897940158844, "vf_explained_var": 0.02697630040347576, "entropy": 12.403926849365234, "cur_lr": 4.999999873689376e-05, "total_loss": 508.1960754394531, "kl": 0.014454166404902935}, "load_time_ms": 0.695, "num_steps_sampled": 270000, "update_time_ms": 2.584}, "training_iteration": 225, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.529298305511475, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 270000, "timesteps_total": 270000, "custom_metrics": {}, "iterations_since_restore": 225, "episodes_this_iter": 31, "episode_reward_min": -93.81865513420475, "date": "2025-09-04_18-37-42", "episode_reward_max": 8.000000406666924, "pid": 3651948, "timestamp": 1757003862, "episode_reward_mean": -62.80773614513645, "time_total_s": 8691.416977643967, "episodes_total": 5983, "episode_len_mean": 38.18}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 8725.05351448059, "info": {"sample_time_ms": 33329.061, "num_steps_trained": 271200, "grad_time_ms": 366.944, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 487.5612487792969, "policy_loss": -0.16774672269821167, "vf_explained_var": 0.01808946020901203, "entropy": 12.464456558227539, "cur_lr": 4.999999873689376e-05, "total_loss": 487.41497802734375, "kl": 0.014172756113111973}, "load_time_ms": 0.682, "num_steps_sampled": 271200, "update_time_ms": 2.571}, "training_iteration": 226, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.636536836624146, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 271200, "timesteps_total": 271200, "custom_metrics": {}, "iterations_since_restore": 226, "episodes_this_iter": 30, "episode_reward_min": -93.81865513420475, "date": "2025-09-04_18-38-16", "episode_reward_max": 8.000000433543274, "pid": 3651948, "timestamp": 1757003896, "episode_reward_mean": -64.92589373827938, "time_total_s": 8725.05351448059, "episodes_total": 6013, "episode_len_mean": 39.31}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 8758.123383283615, "info": {"sample_time_ms": 33281.457, "num_steps_trained": 272400, "grad_time_ms": 366.033, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 469.1771240234375, "policy_loss": -0.17603828012943268, "vf_explained_var": 0.030873127281665802, "entropy": 11.629398345947266, "cur_lr": 4.999999873689376e-05, "total_loss": 469.0252380371094, "kl": 0.015914278104901314}, "load_time_ms": 0.684, "num_steps_sampled": 272400, "update_time_ms": 2.578}, "training_iteration": 227, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.06986880302429, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 272400, "timesteps_total": 272400, "custom_metrics": {}, "iterations_since_restore": 227, "episodes_this_iter": 32, "episode_reward_min": -92.0961907308189, "date": "2025-09-04_18-38-49", "episode_reward_max": 8.000000433543274, "pid": 3651948, "timestamp": 1757003929, "episode_reward_mean": -65.39002211193822, "time_total_s": 8758.123383283615, "episodes_total": 6045, "episode_len_mean": 39.64}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 8791.669610738754, "info": {"sample_time_ms": 33276.525, "num_steps_trained": 273600, "grad_time_ms": 364.904, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 480.46771240234375, "policy_loss": -0.15781445801258087, "vf_explained_var": 0.02216174267232418, "entropy": 11.83214282989502, "cur_lr": 4.999999873689376e-05, "total_loss": 480.3331298828125, "kl": 0.015275244601070881}, "load_time_ms": 0.674, "num_steps_sampled": 273600, "update_time_ms": 2.595}, "training_iteration": 228, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.54622745513916, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 273600, "timesteps_total": 273600, "custom_metrics": {}, "iterations_since_restore": 228, "episodes_this_iter": 33, "episode_reward_min": -92.50693433778561, "date": "2025-09-04_18-39-22", "episode_reward_max": 8.000000433543274, "pid": 3651948, "timestamp": 1757003962, "episode_reward_mean": -62.27324965840894, "time_total_s": 8791.669610738754, "episodes_total": 6078, "episode_len_mean": 37.92}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 8825.53575849533, "info": {"sample_time_ms": 33305.359, "num_steps_trained": 274800, "grad_time_ms": 366.757, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 526.8289794921875, "policy_loss": -0.16418081521987915, "vf_explained_var": 0.01841430552303791, "entropy": 11.820015907287598, "cur_lr": 4.999999873689376e-05, "total_loss": 526.6889038085938, "kl": 0.015894444659352303}, "load_time_ms": 0.681, "num_steps_sampled": 274800, "update_time_ms": 2.626}, "training_iteration": 229, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.86614775657654, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 274800, "timesteps_total": 274800, "custom_metrics": {}, "iterations_since_restore": 229, "episodes_this_iter": 33, "episode_reward_min": -93.84268985082524, "date": "2025-09-04_18-39-56", "episode_reward_max": 6.0005564529203665, "pid": 3651948, "timestamp": 1757003996, "episode_reward_mean": -61.21300704792242, "time_total_s": 8825.53575849533, "episodes_total": 6111, "episode_len_mean": 37.23}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 8859.073387145996, "info": {"sample_time_ms": 33299.837, "num_steps_trained": 276000, "grad_time_ms": 367.377, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 518.92041015625, "policy_loss": -0.16965606808662415, "vf_explained_var": 0.027118226513266563, "entropy": 11.502217292785645, "cur_lr": 4.999999873689376e-05, "total_loss": 518.77490234375, "kl": 0.01592307724058628}, "load_time_ms": 0.692, "num_steps_sampled": 276000, "update_time_ms": 2.589}, "training_iteration": 230, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.53762865066528, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 276000, "timesteps_total": 276000, "custom_metrics": {}, "iterations_since_restore": 230, "episodes_this_iter": 32, "episode_reward_min": -93.84268985082524, "date": "2025-09-04_18-40-30", "episode_reward_max": 6.0005564529203665, "pid": 3651948, "timestamp": 1757004030, "episode_reward_mean": -59.24914097370995, "time_total_s": 8859.073387145996, "episodes_total": 6143, "episode_len_mean": 36.38}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 8892.482960700989, "info": {"sample_time_ms": 33219.391, "num_steps_trained": 277200, "grad_time_ms": 369.319, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 493.50482177734375, "policy_loss": -0.17525163292884827, "vf_explained_var": 0.026482833549380302, "entropy": 11.950725555419922, "cur_lr": 4.999999873689376e-05, "total_loss": 493.3524169921875, "kl": 0.01507889200001955}, "load_time_ms": 0.695, "num_steps_sampled": 277200, "update_time_ms": 2.542}, "training_iteration": 231, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.409573554992676, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 277200, "timesteps_total": 277200, "custom_metrics": {}, "iterations_since_restore": 231, "episodes_this_iter": 35, "episode_reward_min": -93.93805342725528, "date": "2025-09-04_18-41-03", "episode_reward_max": 6.000051157608556, "pid": 3651948, "timestamp": 1757004063, "episode_reward_mean": -58.93394690622315, "time_total_s": 8892.482960700989, "episodes_total": 6178, "episode_len_mean": 36.31}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 8926.56376671791, "info": {"sample_time_ms": 33239.182, "num_steps_trained": 278400, "grad_time_ms": 371.247, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 477.0397033691406, "policy_loss": -0.1522827297449112, "vf_explained_var": 0.024695463478565216, "entropy": 12.114645004272461, "cur_lr": 4.999999873689376e-05, "total_loss": 476.9099426269531, "kl": 0.014842814765870571}, "load_time_ms": 0.7, "num_steps_sampled": 278400, "update_time_ms": 2.557}, "training_iteration": 232, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.080806016922, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 278400, "timesteps_total": 278400, "custom_metrics": {}, "iterations_since_restore": 232, "episodes_this_iter": 35, "episode_reward_min": -93.93805342725528, "date": "2025-09-04_18-41-37", "episode_reward_max": 6.000051157608556, "pid": 3651948, "timestamp": 1757004097, "episode_reward_mean": -57.25040114733103, "time_total_s": 8926.56376671791, "episodes_total": 6213, "episode_len_mean": 35.59}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 8960.791088581085, "info": {"sample_time_ms": 33220.342, "num_steps_trained": 279600, "grad_time_ms": 369.251, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 463.5008544921875, "policy_loss": -0.18232877552509308, "vf_explained_var": 0.022623876109719276, "entropy": 11.372271537780762, "cur_lr": 4.999999873689376e-05, "total_loss": 463.34088134765625, "kl": 0.014735642820596695}, "load_time_ms": 0.685, "num_steps_sampled": 279600, "update_time_ms": 2.589}, "training_iteration": 233, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.22732186317444, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 279600, "timesteps_total": 279600, "custom_metrics": {}, "iterations_since_restore": 233, "episodes_this_iter": 33, "episode_reward_min": -93.93805342725528, "date": "2025-09-04_18-42-12", "episode_reward_max": 6.000025070402176, "pid": 3651948, "timestamp": 1757004132, "episode_reward_mean": -55.39362406723989, "time_total_s": 8960.791088581085, "episodes_total": 6246, "episode_len_mean": 34.64}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 8994.085668563843, "info": {"sample_time_ms": 33242.289, "num_steps_trained": 280800, "grad_time_ms": 369.641, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 465.8112487792969, "policy_loss": -0.15602374076843262, "vf_explained_var": 0.043435726314783096, "entropy": 12.00288200378418, "cur_lr": 4.999999873689376e-05, "total_loss": 465.6771240234375, "kl": 0.014424502849578857}, "load_time_ms": 0.689, "num_steps_sampled": 280800, "update_time_ms": 2.565}, "training_iteration": 234, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.29457998275757, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 280800, "timesteps_total": 280800, "custom_metrics": {}, "iterations_since_restore": 234, "episodes_this_iter": 32, "episode_reward_min": -93.07855688625773, "date": "2025-09-04_18-42-45", "episode_reward_max": 6.000025070402176, "pid": 3651948, "timestamp": 1757004165, "episode_reward_mean": -56.94350179907652, "time_total_s": 8994.085668563843, "episodes_total": 6278, "episode_len_mean": 35.68}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 9027.672051429749, "info": {"sample_time_ms": 33245.241, "num_steps_trained": 282000, "grad_time_ms": 372.427, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 537.2978515625, "policy_loss": -0.16486559808254242, "vf_explained_var": 0.02764366753399372, "entropy": 11.935712814331055, "cur_lr": 4.999999873689376e-05, "total_loss": 537.1552124023438, "kl": 0.01467643678188324}, "load_time_ms": 0.694, "num_steps_sampled": 282000, "update_time_ms": 2.541}, "training_iteration": 235, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.58638286590576, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 282000, "timesteps_total": 282000, "custom_metrics": {}, "iterations_since_restore": 235, "episodes_this_iter": 30, "episode_reward_min": -93.07855688625773, "date": "2025-09-04_18-43-19", "episode_reward_max": 6.000025070402176, "pid": 3651948, "timestamp": 1757004199, "episode_reward_mean": -58.26857565715705, "time_total_s": 9027.672051429749, "episodes_total": 6308, "episode_len_mean": 36.4}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 9061.819860935211, "info": {"sample_time_ms": 33297.025, "num_steps_trained": 283200, "grad_time_ms": 371.802, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 552.9977416992188, "policy_loss": -0.15926803648471832, "vf_explained_var": 0.023812182247638702, "entropy": 11.88892650604248, "cur_lr": 4.999999873689376e-05, "total_loss": 552.8607177734375, "kl": 0.014615191146731377}, "load_time_ms": 0.695, "num_steps_sampled": 283200, "update_time_ms": 2.536}, "training_iteration": 236, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.14780950546265, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 283200, "timesteps_total": 283200, "custom_metrics": {}, "iterations_since_restore": 236, "episodes_this_iter": 33, "episode_reward_min": -92.59710856730977, "date": "2025-09-04_18-43-53", "episode_reward_max": 8.000068323775915, "pid": 3651948, "timestamp": 1757004233, "episode_reward_mean": -61.44614542643216, "time_total_s": 9061.819860935211, "episodes_total": 6341, "episode_len_mean": 37.82}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 9095.312840461731, "info": {"sample_time_ms": 33338.48, "num_steps_trained": 284400, "grad_time_ms": 372.653, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 462.8885192871094, "policy_loss": -0.18361049890518188, "vf_explained_var": 0.02978028915822506, "entropy": 11.811455726623535, "cur_lr": 4.999999873689376e-05, "total_loss": 462.72698974609375, "kl": 0.014550920575857162}, "load_time_ms": 0.698, "num_steps_sampled": 284400, "update_time_ms": 2.543}, "training_iteration": 237, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.492979526519775, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 284400, "timesteps_total": 284400, "custom_metrics": {}, "iterations_since_restore": 237, "episodes_this_iter": 31, "episode_reward_min": -95.66316184995254, "date": "2025-09-04_18-44-26", "episode_reward_max": 8.000068323775915, "pid": 3651948, "timestamp": 1757004266, "episode_reward_mean": -63.84925295945575, "time_total_s": 9095.312840461731, "episodes_total": 6372, "episode_len_mean": 38.73}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 9128.5523583889, "info": {"sample_time_ms": 33307.224, "num_steps_trained": 285600, "grad_time_ms": 373.24, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 482.65777587890625, "policy_loss": -0.16565656661987305, "vf_explained_var": 0.0365450456738472, "entropy": 12.015816688537598, "cur_lr": 4.999999873689376e-05, "total_loss": 482.5158386230469, "kl": 0.015651242807507515}, "load_time_ms": 0.704, "num_steps_sampled": 285600, "update_time_ms": 2.529}, "training_iteration": 238, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.2395179271698, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 285600, "timesteps_total": 285600, "custom_metrics": {}, "iterations_since_restore": 238, "episodes_this_iter": 34, "episode_reward_min": -95.66316184995254, "date": "2025-09-04_18-45-00", "episode_reward_max": 8.000068323775915, "pid": 3651948, "timestamp": 1757004300, "episode_reward_mean": -62.42745717113413, "time_total_s": 9128.5523583889, "episodes_total": 6406, "episode_len_mean": 38.06}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 9163.238487005234, "info": {"sample_time_ms": 33387.815, "num_steps_trained": 286800, "grad_time_ms": 374.688, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 439.43817138671875, "policy_loss": -0.17008011043071747, "vf_explained_var": 0.025046832859516144, "entropy": 12.047761917114258, "cur_lr": 4.999999873689376e-05, "total_loss": 439.2913513183594, "kl": 0.01528315432369709}, "load_time_ms": 0.698, "num_steps_sampled": 286800, "update_time_ms": 2.507}, "training_iteration": 239, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.68612861633301, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 286800, "timesteps_total": 286800, "custom_metrics": {}, "iterations_since_restore": 239, "episodes_this_iter": 30, "episode_reward_min": -95.66316184995254, "date": "2025-09-04_18-45-34", "episode_reward_max": 6.00007350824956, "pid": 3651948, "timestamp": 1757004334, "episode_reward_mean": -63.69194950752015, "time_total_s": 9163.238487005234, "episodes_total": 6436, "episode_len_mean": 38.83}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 9197.054631233215, "info": {"sample_time_ms": 33417.206, "num_steps_trained": 288000, "grad_time_ms": 373.11, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 479.13433837890625, "policy_loss": -0.16308458149433136, "vf_explained_var": 0.041000742465257645, "entropy": 12.105916976928711, "cur_lr": 4.999999873689376e-05, "total_loss": 478.9933776855469, "kl": 0.01457090862095356}, "load_time_ms": 0.705, "num_steps_sampled": 288000, "update_time_ms": 2.543}, "training_iteration": 240, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.81614422798157, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 288000, "timesteps_total": 288000, "custom_metrics": {}, "iterations_since_restore": 240, "episodes_this_iter": 32, "episode_reward_min": -91.67071305108782, "date": "2025-09-04_18-46-08", "episode_reward_max": 6.00007350824956, "pid": 3651948, "timestamp": 1757004368, "episode_reward_mean": -58.72263234325682, "time_total_s": 9197.054631233215, "episodes_total": 6468, "episode_len_mean": 36.91}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 9230.33184838295, "info": {"sample_time_ms": 33405.928, "num_steps_trained": 289200, "grad_time_ms": 371.198, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 472.53179931640625, "policy_loss": -0.16461673378944397, "vf_explained_var": 0.016277603805065155, "entropy": 12.22976303100586, "cur_lr": 4.999999873689376e-05, "total_loss": 472.39068603515625, "kl": 0.015477120876312256}, "load_time_ms": 0.703, "num_steps_sampled": 289200, "update_time_ms": 2.505}, "training_iteration": 241, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.2772171497345, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 289200, "timesteps_total": 289200, "custom_metrics": {}, "iterations_since_restore": 241, "episodes_this_iter": 35, "episode_reward_min": -92.50884099974769, "date": "2025-09-04_18-46-41", "episode_reward_max": 6.000044439385878, "pid": 3651948, "timestamp": 1757004401, "episode_reward_mean": -58.68430621255006, "time_total_s": 9230.33184838295, "episodes_total": 6503, "episode_len_mean": 36.9}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 9264.127333402634, "info": {"sample_time_ms": 33377.388, "num_steps_trained": 290400, "grad_time_ms": 371.17, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 457.06732177734375, "policy_loss": -0.17669327557086945, "vf_explained_var": 0.02635866403579712, "entropy": 11.52662181854248, "cur_lr": 4.999999873689376e-05, "total_loss": 456.9132080078125, "kl": 0.01483425684273243}, "load_time_ms": 0.713, "num_steps_sampled": 290400, "update_time_ms": 2.495}, "training_iteration": 242, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.79548501968384, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 290400, "timesteps_total": 290400, "custom_metrics": {}, "iterations_since_restore": 242, "episodes_this_iter": 33, "episode_reward_min": -94.35696984520187, "date": "2025-09-04_18-47-15", "episode_reward_max": 8.000000402653834, "pid": 3651948, "timestamp": 1757004435, "episode_reward_mean": -58.24223301701984, "time_total_s": 9264.127333402634, "episodes_total": 6536, "episode_len_mean": 36.42}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 9298.336977005005, "info": {"sample_time_ms": 33374.345, "num_steps_trained": 291600, "grad_time_ms": 372.454, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 497.88671875, "policy_loss": -0.16106237471103668, "vf_explained_var": 0.03792598471045494, "entropy": 11.966264724731445, "cur_lr": 4.999999873689376e-05, "total_loss": 497.7486877441406, "kl": 0.015170086175203323}, "load_time_ms": 0.711, "num_steps_sampled": 291600, "update_time_ms": 2.501}, "training_iteration": 243, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.209643602371216, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 291600, "timesteps_total": 291600, "custom_metrics": {}, "iterations_since_restore": 243, "episodes_this_iter": 30, "episode_reward_min": -94.35696984520187, "date": "2025-09-04_18-47-49", "episode_reward_max": 8.000000402653834, "pid": 3651948, "timestamp": 1757004469, "episode_reward_mean": -57.96694031513576, "time_total_s": 9298.336977005005, "episodes_total": 6566, "episode_len_mean": 36.01}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 9331.860694169998, "info": {"sample_time_ms": 33395.731, "num_steps_trained": 292800, "grad_time_ms": 373.965, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 564.9078979492188, "policy_loss": -0.16683726012706757, "vf_explained_var": 0.02323988452553749, "entropy": 11.774674415588379, "cur_lr": 4.999999873689376e-05, "total_loss": 564.7649536132812, "kl": 0.0157768651843071}, "load_time_ms": 0.709, "num_steps_sampled": 292800, "update_time_ms": 2.473}, "training_iteration": 244, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.523717164993286, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 292800, "timesteps_total": 292800, "custom_metrics": {}, "iterations_since_restore": 244, "episodes_this_iter": 35, "episode_reward_min": -94.35696984520187, "date": "2025-09-04_18-48-23", "episode_reward_max": 8.000000402653834, "pid": 3651948, "timestamp": 1757004503, "episode_reward_mean": -58.10606370958501, "time_total_s": 9331.860694169998, "episodes_total": 6601, "episode_len_mean": 36.2}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 9365.360492706299, "info": {"sample_time_ms": 33388.358, "num_steps_trained": 294000, "grad_time_ms": 372.607, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 525.4404907226562, "policy_loss": -0.17092472314834595, "vf_explained_var": 0.030833972617983818, "entropy": 12.102313041687012, "cur_lr": 4.999999873689376e-05, "total_loss": 525.2924194335938, "kl": 0.015038705430924892}, "load_time_ms": 0.711, "num_steps_sampled": 294000, "update_time_ms": 2.535}, "training_iteration": 245, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.49979853630066, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 294000, "timesteps_total": 294000, "custom_metrics": {}, "iterations_since_restore": 245, "episodes_this_iter": 39, "episode_reward_min": -92.67283625827994, "date": "2025-09-04_18-48-57", "episode_reward_max": 8.000000401008807, "pid": 3651948, "timestamp": 1757004537, "episode_reward_mean": -53.75262255852712, "time_total_s": 9365.360492706299, "episodes_total": 6640, "episode_len_mean": 34.39}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 9399.292599201202, "info": {"sample_time_ms": 33366.768, "num_steps_trained": 295200, "grad_time_ms": 372.608, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 503.8609619140625, "policy_loss": -0.1675948053598404, "vf_explained_var": 0.014279961585998535, "entropy": 11.799020767211914, "cur_lr": 4.999999873689376e-05, "total_loss": 503.7156677246094, "kl": 0.014678357169032097}, "load_time_ms": 0.72, "num_steps_sampled": 295200, "update_time_ms": 2.56}, "training_iteration": 246, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.932106494903564, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 295200, "timesteps_total": 295200, "custom_metrics": {}, "iterations_since_restore": 246, "episodes_this_iter": 34, "episode_reward_min": -91.12291953131009, "date": "2025-09-04_18-49-30", "episode_reward_max": 8.000000401008807, "pid": 3651948, "timestamp": 1757004570, "episode_reward_mean": -52.047525918583716, "time_total_s": 9399.292599201202, "episodes_total": 6674, "episode_len_mean": 33.62}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 9433.063627958298, "info": {"sample_time_ms": 33394.902, "num_steps_trained": 296400, "grad_time_ms": 372.305, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 482.9005126953125, "policy_loss": -0.17791959643363953, "vf_explained_var": 0.02709423191845417, "entropy": 12.1153564453125, "cur_lr": 4.999999873689376e-05, "total_loss": 482.7449035644531, "kl": 0.014708485454320908}, "load_time_ms": 0.727, "num_steps_sampled": 296400, "update_time_ms": 2.534}, "training_iteration": 247, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.77102875709534, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 296400, "timesteps_total": 296400, "custom_metrics": {}, "iterations_since_restore": 247, "episodes_this_iter": 34, "episode_reward_min": -91.4407548251745, "date": "2025-09-04_18-50-04", "episode_reward_max": 8.000000400008329, "pid": 3651948, "timestamp": 1757004604, "episode_reward_mean": -51.35171391690495, "time_total_s": 9433.063627958298, "episodes_total": 6708, "episode_len_mean": 33.29}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 9466.194394826889, "info": {"sample_time_ms": 33384.891, "num_steps_trained": 297600, "grad_time_ms": 371.499, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 510.83685302734375, "policy_loss": -0.17384184896945953, "vf_explained_var": 0.014701505191624165, "entropy": 11.439382553100586, "cur_lr": 4.999999873689376e-05, "total_loss": 510.68658447265625, "kl": 0.015532774850726128}, "load_time_ms": 0.718, "num_steps_sampled": 297600, "update_time_ms": 2.512}, "training_iteration": 248, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.13076686859131, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 297600, "timesteps_total": 297600, "custom_metrics": {}, "iterations_since_restore": 248, "episodes_this_iter": 32, "episode_reward_min": -91.44445958853663, "date": "2025-09-04_18-50-37", "episode_reward_max": 8.000000625473389, "pid": 3651948, "timestamp": 1757004637, "episode_reward_mean": -55.1829405642252, "time_total_s": 9466.194394826889, "episodes_total": 6740, "episode_len_mean": 34.99}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 9499.82013463974, "info": {"sample_time_ms": 33282.448, "num_steps_trained": 298800, "grad_time_ms": 367.948, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 498.81988525390625, "policy_loss": -0.15983320772647858, "vf_explained_var": 0.018894175067543983, "entropy": 11.790884971618652, "cur_lr": 4.999999873689376e-05, "total_loss": 498.68408203125, "kl": 0.015838027000427246}, "load_time_ms": 0.72, "num_steps_sampled": 298800, "update_time_ms": 2.49}, "training_iteration": 249, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.62573981285095, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 298800, "timesteps_total": 298800, "custom_metrics": {}, "iterations_since_restore": 249, "episodes_this_iter": 34, "episode_reward_min": -93.8386863279901, "date": "2025-09-04_18-51-11", "episode_reward_max": 8.000000625473389, "pid": 3651948, "timestamp": 1757004671, "episode_reward_mean": -57.87083837237296, "time_total_s": 9499.82013463974, "episodes_total": 6774, "episode_len_mean": 36.09}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 9533.402312994003, "info": {"sample_time_ms": 33261.054, "num_steps_trained": 300000, "grad_time_ms": 366.013, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 493.55804443359375, "policy_loss": -0.16028568148612976, "vf_explained_var": 0.02670077420771122, "entropy": 11.616454124450684, "cur_lr": 4.999999873689376e-05, "total_loss": 493.42083740234375, "kl": 0.015181425958871841}, "load_time_ms": 0.696, "num_steps_sampled": 300000, "update_time_ms": 2.449}, "training_iteration": 250, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.582178354263306, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 300000, "timesteps_total": 300000, "custom_metrics": {}, "iterations_since_restore": 250, "episodes_this_iter": 36, "episode_reward_min": -93.8386863279901, "date": "2025-09-04_18-51-45", "episode_reward_max": 8.000000625473389, "pid": 3651948, "timestamp": 1757004705, "episode_reward_mean": -57.23362154027995, "time_total_s": 9533.402312994003, "episodes_total": 6810, "episode_len_mean": 35.7}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 9566.642753839493, "info": {"sample_time_ms": 33255.008, "num_steps_trained": 301200, "grad_time_ms": 368.254, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 444.5202331542969, "policy_loss": -0.16525143384933472, "vf_explained_var": 0.032825905829668045, "entropy": 11.39554214477539, "cur_lr": 4.999999873689376e-05, "total_loss": 444.3780822753906, "kl": 0.015219918452203274}, "load_time_ms": 0.699, "num_steps_sampled": 301200, "update_time_ms": 2.562}, "training_iteration": 251, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.2404408454895, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 301200, "timesteps_total": 301200, "custom_metrics": {}, "iterations_since_restore": 251, "episodes_this_iter": 33, "episode_reward_min": -93.07010464848874, "date": "2025-09-04_18-52-18", "episode_reward_max": 8.00000040304245, "pid": 3651948, "timestamp": 1757004738, "episode_reward_mean": -55.47286553294055, "time_total_s": 9566.642753839493, "episodes_total": 6843, "episode_len_mean": 35.11}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 9600.813539981842, "info": {"sample_time_ms": 33293.166, "num_steps_trained": 302400, "grad_time_ms": 367.724, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 490.7688903808594, "policy_loss": -0.181904137134552, "vf_explained_var": 0.03228212893009186, "entropy": 12.051400184631348, "cur_lr": 4.999999873689376e-05, "total_loss": 490.61029052734375, "kl": 0.015358841978013515}, "load_time_ms": 0.675, "num_steps_sampled": 302400, "update_time_ms": 2.552}, "training_iteration": 252, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.17078614234924, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 302400, "timesteps_total": 302400, "custom_metrics": {}, "iterations_since_restore": 252, "episodes_this_iter": 37, "episode_reward_min": -93.1502064041992, "date": "2025-09-04_18-52-52", "episode_reward_max": 6.00001167450546, "pid": 3651948, "timestamp": 1757004772, "episode_reward_mean": -53.44209251089664, "time_total_s": 9600.813539981842, "episodes_total": 6880, "episode_len_mean": 34.15}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 9634.56543135643, "info": {"sample_time_ms": 33249.286, "num_steps_trained": 303600, "grad_time_ms": 365.85, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 438.65264892578125, "policy_loss": -0.1741490662097931, "vf_explained_var": 0.03822045028209686, "entropy": 11.78211498260498, "cur_lr": 4.999999873689376e-05, "total_loss": 438.5022888183594, "kl": 0.015649745240807533}, "load_time_ms": 0.685, "num_steps_sampled": 303600, "update_time_ms": 2.515}, "training_iteration": 253, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.75189137458801, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 303600, "timesteps_total": 303600, "custom_metrics": {}, "iterations_since_restore": 253, "episodes_this_iter": 34, "episode_reward_min": -93.1502064041992, "date": "2025-09-04_18-53-26", "episode_reward_max": 4.055704940266937, "pid": 3651948, "timestamp": 1757004806, "episode_reward_mean": -53.492828528782695, "time_total_s": 9634.56543135643, "episodes_total": 6914, "episode_len_mean": 34.28}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 9669.068894147873, "info": {"sample_time_ms": 33346.713, "num_steps_trained": 304800, "grad_time_ms": 366.41, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 502.4017028808594, "policy_loss": -0.176845520734787, "vf_explained_var": 0.03880747780203819, "entropy": 11.777851104736328, "cur_lr": 4.999999873689376e-05, "total_loss": 502.2478332519531, "kl": 0.015134657733142376}, "load_time_ms": 0.69, "num_steps_sampled": 304800, "update_time_ms": 2.528}, "training_iteration": 254, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.50346279144287, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 304800, "timesteps_total": 304800, "custom_metrics": {}, "iterations_since_restore": 254, "episodes_this_iter": 36, "episode_reward_min": -93.38752497946227, "date": "2025-09-04_18-54-00", "episode_reward_max": 5.312839912494095, "pid": 3651948, "timestamp": 1757004840, "episode_reward_mean": -52.2676855411649, "time_total_s": 9669.068894147873, "episodes_total": 6950, "episode_len_mean": 33.61}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 9702.425583600998, "info": {"sample_time_ms": 33331.79, "num_steps_trained": 306000, "grad_time_ms": 367.077, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 496.5535583496094, "policy_loss": -0.16783879697322845, "vf_explained_var": 0.017874909564852715, "entropy": 11.913069725036621, "cur_lr": 4.999999873689376e-05, "total_loss": 496.4104309082031, "kl": 0.016264840960502625}, "load_time_ms": 0.682, "num_steps_sampled": 306000, "update_time_ms": 2.473}, "training_iteration": 255, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.356689453125, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 306000, "timesteps_total": 306000, "custom_metrics": {}, "iterations_since_restore": 255, "episodes_this_iter": 35, "episode_reward_min": -93.38752497946227, "date": "2025-09-04_18-54-34", "episode_reward_max": 6.000102246417464, "pid": 3651948, "timestamp": 1757004874, "episode_reward_mean": -52.95659910840507, "time_total_s": 9702.425583600998, "episodes_total": 6985, "episode_len_mean": 33.75}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 9736.113502502441, "info": {"sample_time_ms": 33306.613, "num_steps_trained": 307200, "grad_time_ms": 367.862, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 501.1930847167969, "policy_loss": -0.17009110748767853, "vf_explained_var": 0.03265717998147011, "entropy": 11.818390846252441, "cur_lr": 4.999999873689376e-05, "total_loss": 501.0460510253906, "kl": 0.015193293802440166}, "load_time_ms": 0.682, "num_steps_sampled": 307200, "update_time_ms": 2.441}, "training_iteration": 256, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.68791890144348, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 307200, "timesteps_total": 307200, "custom_metrics": {}, "iterations_since_restore": 256, "episodes_this_iter": 39, "episode_reward_min": -92.67251074277206, "date": "2025-09-04_18-55-07", "episode_reward_max": 6.000109429998887, "pid": 3651948, "timestamp": 1757004907, "episode_reward_mean": -49.49791261277659, "time_total_s": 9736.113502502441, "episodes_total": 7024, "episode_len_mean": 32.13}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 9769.884490966797, "info": {"sample_time_ms": 33307.422, "num_steps_trained": 308400, "grad_time_ms": 367.01, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 498.14794921875, "policy_loss": -0.16582104563713074, "vf_explained_var": 0.024809036403894424, "entropy": 11.592876434326172, "cur_lr": 4.999999873689376e-05, "total_loss": 498.0042419433594, "kl": 0.0145410830155015}, "load_time_ms": 0.668, "num_steps_sampled": 308400, "update_time_ms": 2.452}, "training_iteration": 257, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.77098846435547, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 308400, "timesteps_total": 308400, "custom_metrics": {}, "iterations_since_restore": 257, "episodes_this_iter": 43, "episode_reward_min": -91.53659906537581, "date": "2025-09-04_18-55-41", "episode_reward_max": 6.000109429998887, "pid": 3651948, "timestamp": 1757004941, "episode_reward_mean": -43.73236682884013, "time_total_s": 9769.884490966797, "episodes_total": 7067, "episode_len_mean": 29.5}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 9804.377316713333, "info": {"sample_time_ms": 33442.048, "num_steps_trained": 309600, "grad_time_ms": 368.504, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 474.3194885253906, "policy_loss": -0.17338663339614868, "vf_explained_var": 0.027189724147319794, "entropy": 11.489995956420898, "cur_lr": 4.999999873689376e-05, "total_loss": 474.17138671875, "kl": 0.016618233174085617}, "load_time_ms": 0.673, "num_steps_sampled": 309600, "update_time_ms": 2.499}, "training_iteration": 258, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.492825746536255, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 309600, "timesteps_total": 309600, "custom_metrics": {}, "iterations_since_restore": 258, "episodes_this_iter": 39, "episode_reward_min": -91.35076520477672, "date": "2025-09-04_18-56-16", "episode_reward_max": 4.000352388379444, "pid": 3651948, "timestamp": 1757004976, "episode_reward_mean": -42.154622521712774, "time_total_s": 9804.377316713333, "episodes_total": 7106, "episode_len_mean": 28.72}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 9837.982171058655, "info": {"sample_time_ms": 33437.854, "num_steps_trained": 310800, "grad_time_ms": 370.569, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 441.2422790527344, "policy_loss": -0.1783483773469925, "vf_explained_var": 0.036072149872779846, "entropy": 11.615463256835938, "cur_lr": 4.999999873689376e-05, "total_loss": 441.0879211425781, "kl": 0.015805954113602638}, "load_time_ms": 0.67, "num_steps_sampled": 310800, "update_time_ms": 2.501}, "training_iteration": 259, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.604854345321655, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 310800, "timesteps_total": 310800, "custom_metrics": {}, "iterations_since_restore": 259, "episodes_this_iter": 34, "episode_reward_min": -91.35076520477672, "date": "2025-09-04_18-56-49", "episode_reward_max": 2.0005359728741396, "pid": 3651948, "timestamp": 1757005009, "episode_reward_mean": -48.61172280956397, "time_total_s": 9837.982171058655, "episodes_total": 7140, "episode_len_mean": 31.96}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 9871.78459239006, "info": {"sample_time_ms": 33457.814, "num_steps_trained": 312000, "grad_time_ms": 372.539, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 456.3844299316406, "policy_loss": -0.18062280118465424, "vf_explained_var": 0.0369785837829113, "entropy": 11.31128978729248, "cur_lr": 4.999999873689376e-05, "total_loss": 456.2292175292969, "kl": 0.016710573807358742}, "load_time_ms": 0.672, "num_steps_sampled": 312000, "update_time_ms": 2.575}, "training_iteration": 260, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.80242133140564, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 312000, "timesteps_total": 312000, "custom_metrics": {}, "iterations_since_restore": 260, "episodes_this_iter": 36, "episode_reward_min": -92.4291552854384, "date": "2025-09-04_18-57-23", "episode_reward_max": 6.000080980608969, "pid": 3651948, "timestamp": 1757005043, "episode_reward_mean": -52.84823083865218, "time_total_s": 9871.78459239006, "episodes_total": 7176, "episode_len_mean": 34.19}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 9905.303673744202, "info": {"sample_time_ms": 33486.184, "num_steps_trained": 313200, "grad_time_ms": 372.131, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 445.8668212890625, "policy_loss": -0.17220252752304077, "vf_explained_var": 0.027600638568401337, "entropy": 11.534998893737793, "cur_lr": 4.999999873689376e-05, "total_loss": 445.7190856933594, "kl": 0.016097839921712875}, "load_time_ms": 0.668, "num_steps_sampled": 313200, "update_time_ms": 2.504}, "training_iteration": 261, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.519081354141235, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 313200, "timesteps_total": 313200, "custom_metrics": {}, "iterations_since_restore": 261, "episodes_this_iter": 32, "episode_reward_min": -92.4291552854384, "date": "2025-09-04_18-57-57", "episode_reward_max": 6.000080980608969, "pid": 3651948, "timestamp": 1757005077, "episode_reward_mean": -55.41658854010331, "time_total_s": 9905.303673744202, "episodes_total": 7208, "episode_len_mean": 35.25}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 9939.227750062943, "info": {"sample_time_ms": 33460.782, "num_steps_trained": 314400, "grad_time_ms": 372.764, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 454.6693115234375, "policy_loss": -0.16877640783786774, "vf_explained_var": 0.017379429191350937, "entropy": 11.471319198608398, "cur_lr": 4.999999873689376e-05, "total_loss": 454.52459716796875, "kl": 0.015870148316025734}, "load_time_ms": 0.678, "num_steps_sampled": 314400, "update_time_ms": 2.522}, "training_iteration": 262, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.924076318740845, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 314400, "timesteps_total": 314400, "custom_metrics": {}, "iterations_since_restore": 262, "episodes_this_iter": 33, "episode_reward_min": -92.4291552854384, "date": "2025-09-04_18-58-31", "episode_reward_max": 6.000086958096147, "pid": 3651948, "timestamp": 1757005111, "episode_reward_mean": -56.424942778632506, "time_total_s": 9939.227750062943, "episodes_total": 7241, "episode_len_mean": 35.48}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 9973.117554426193, "info": {"sample_time_ms": 33472.645, "num_steps_trained": 315600, "grad_time_ms": 374.627, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 514.4802856445312, "policy_loss": -0.16580967605113983, "vf_explained_var": 0.030511697754263878, "entropy": 11.81280517578125, "cur_lr": 4.999999873689376e-05, "total_loss": 514.3388061523438, "kl": 0.015987424179911613}, "load_time_ms": 0.672, "num_steps_sampled": 315600, "update_time_ms": 2.558}, "training_iteration": 263, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.88980436325073, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 315600, "timesteps_total": 315600, "custom_metrics": {}, "iterations_since_restore": 263, "episodes_this_iter": 36, "episode_reward_min": -91.77151161971379, "date": "2025-09-04_18-59-05", "episode_reward_max": 6.000086958096147, "pid": 3651948, "timestamp": 1757005145, "episode_reward_mean": -55.9312512364152, "time_total_s": 9973.117554426193, "episodes_total": 7277, "episode_len_mean": 35.37}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 10007.813853263855, "info": {"sample_time_ms": 33491.385, "num_steps_trained": 316800, "grad_time_ms": 375.175, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 447.14453125, "policy_loss": -0.1734510362148285, "vf_explained_var": 0.02611129730939865, "entropy": 10.773795127868652, "cur_lr": 4.999999873689376e-05, "total_loss": 446.99371337890625, "kl": 0.014916815795004368}, "load_time_ms": 0.664, "num_steps_sampled": 316800, "update_time_ms": 2.58}, "training_iteration": 264, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.69629883766174, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 316800, "timesteps_total": 316800, "custom_metrics": {}, "iterations_since_restore": 264, "episodes_this_iter": 42, "episode_reward_min": -91.53913098515123, "date": "2025-09-04_18-59-39", "episode_reward_max": 6.000575166421358, "pid": 3651948, "timestamp": 1757005179, "episode_reward_mean": -49.16608473715471, "time_total_s": 10007.813853263855, "episodes_total": 7319, "episode_len_mean": 32.08}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 10040.971177101135, "info": {"sample_time_ms": 33473.134, "num_steps_trained": 318000, "grad_time_ms": 373.482, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 522.8036499023438, "policy_loss": -0.16708451509475708, "vf_explained_var": 0.020491890609264374, "entropy": 11.706774711608887, "cur_lr": 4.999999873689376e-05, "total_loss": 522.6589965820312, "kl": 0.014792154543101788}, "load_time_ms": 0.664, "num_steps_sampled": 318000, "update_time_ms": 2.586}, "training_iteration": 265, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.15732383728027, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 318000, "timesteps_total": 318000, "custom_metrics": {}, "iterations_since_restore": 265, "episodes_this_iter": 37, "episode_reward_min": -91.0761048981628, "date": "2025-09-04_19-00-13", "episode_reward_max": 6.000575166421358, "pid": 3651948, "timestamp": 1757005213, "episode_reward_mean": -47.178333058434504, "time_total_s": 10040.971177101135, "episodes_total": 7356, "episode_len_mean": 31.15}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 10074.561195135117, "info": {"sample_time_ms": 33464.165, "num_steps_trained": 319200, "grad_time_ms": 372.685, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 446.42901611328125, "policy_loss": -0.16864469647407532, "vf_explained_var": 0.020345423370599747, "entropy": 11.407777786254883, "cur_lr": 4.999999873689376e-05, "total_loss": 446.2832946777344, "kl": 0.015111408196389675}, "load_time_ms": 0.663, "num_steps_sampled": 319200, "update_time_ms": 2.599}, "training_iteration": 266, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.59001803398132, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 319200, "timesteps_total": 319200, "custom_metrics": {}, "iterations_since_restore": 266, "episodes_this_iter": 35, "episode_reward_min": -92.17183099341096, "date": "2025-09-04_19-00-46", "episode_reward_max": 8.000000403929597, "pid": 3651948, "timestamp": 1757005246, "episode_reward_mean": -50.03188182993689, "time_total_s": 10074.561195135117, "episodes_total": 7391, "episode_len_mean": 32.47}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 10108.989178180695, "info": {"sample_time_ms": 33529.985, "num_steps_trained": 320400, "grad_time_ms": 372.617, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 521.6529541015625, "policy_loss": -0.17513184249401093, "vf_explained_var": 0.023661097511649132, "entropy": 11.008745193481445, "cur_lr": 4.999999873689376e-05, "total_loss": 521.501953125, "kl": 0.015934422612190247}, "load_time_ms": 0.675, "num_steps_sampled": 320400, "update_time_ms": 2.589}, "training_iteration": 267, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.427983045578, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 320400, "timesteps_total": 320400, "custom_metrics": {}, "iterations_since_restore": 267, "episodes_this_iter": 42, "episode_reward_min": -92.17183099341096, "date": "2025-09-04_19-01-21", "episode_reward_max": 8.000003200551006, "pid": 3651948, "timestamp": 1757005281, "episode_reward_mean": -47.14156567128666, "time_total_s": 10108.989178180695, "episodes_total": 7433, "episode_len_mean": 31.2}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 10143.616182804108, "info": {"sample_time_ms": 33542.544, "num_steps_trained": 321600, "grad_time_ms": 373.486, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 496.2048034667969, "policy_loss": -0.1643340140581131, "vf_explained_var": 0.01232109498232603, "entropy": 11.45879077911377, "cur_lr": 4.999999873689376e-05, "total_loss": 496.0647277832031, "kl": 0.015981314703822136}, "load_time_ms": 0.699, "num_steps_sampled": 321600, "update_time_ms": 2.566}, "training_iteration": 268, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.627004623413086, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 321600, "timesteps_total": 321600, "custom_metrics": {}, "iterations_since_restore": 268, "episodes_this_iter": 42, "episode_reward_min": -93.49556735773535, "date": "2025-09-04_19-01-55", "episode_reward_max": 8.000003200551006, "pid": 3651948, "timestamp": 1757005315, "episode_reward_mean": -42.24551633872482, "time_total_s": 10143.616182804108, "episodes_total": 7475, "episode_len_mean": 28.59}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 10177.26104593277, "info": {"sample_time_ms": 33547.64, "num_steps_trained": 322800, "grad_time_ms": 372.376, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 473.0905456542969, "policy_loss": -0.18091005086898804, "vf_explained_var": 0.03324628621339798, "entropy": 11.576276779174805, "cur_lr": 4.999999873689376e-05, "total_loss": 472.9339599609375, "kl": 0.015997041016817093}, "load_time_ms": 0.696, "num_steps_sampled": 322800, "update_time_ms": 2.573}, "training_iteration": 269, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.64486312866211, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 322800, "timesteps_total": 322800, "custom_metrics": {}, "iterations_since_restore": 269, "episodes_this_iter": 45, "episode_reward_min": -93.49556735773535, "date": "2025-09-04_19-02-29", "episode_reward_max": 6.000151534633431, "pid": 3651948, "timestamp": 1757005349, "episode_reward_mean": -41.814555740237004, "time_total_s": 10177.26104593277, "episodes_total": 7520, "episode_len_mean": 28.3}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 10212.144480705261, "info": {"sample_time_ms": 33656.169, "num_steps_trained": 324000, "grad_time_ms": 371.946, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 484.0221862792969, "policy_loss": -0.16421444714069366, "vf_explained_var": 0.01682865619659424, "entropy": 11.210699081420898, "cur_lr": 4.999999873689376e-05, "total_loss": 483.8819580078125, "kl": 0.015759721398353577}, "load_time_ms": 0.692, "num_steps_sampled": 324000, "update_time_ms": 2.53}, "training_iteration": 270, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.883434772491455, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 324000, "timesteps_total": 324000, "custom_metrics": {}, "iterations_since_restore": 270, "episodes_this_iter": 43, "episode_reward_min": -93.49556735773535, "date": "2025-09-04_19-03-04", "episode_reward_max": 6.000151534633431, "pid": 3651948, "timestamp": 1757005384, "episode_reward_mean": -42.90693813406929, "time_total_s": 10212.144480705261, "episodes_total": 7563, "episode_len_mean": 28.76}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 10245.876401901245, "info": {"sample_time_ms": 33679.183, "num_steps_trained": 325200, "grad_time_ms": 370.133, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 479.9202880859375, "policy_loss": -0.1779698133468628, "vf_explained_var": 0.023097369819879532, "entropy": 10.930511474609375, "cur_lr": 4.999999873689376e-05, "total_loss": 479.7669372558594, "kl": 0.016181154176592827}, "load_time_ms": 0.695, "num_steps_sampled": 325200, "update_time_ms": 2.613}, "training_iteration": 271, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.73192119598389, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 325200, "timesteps_total": 325200, "custom_metrics": {}, "iterations_since_restore": 271, "episodes_this_iter": 36, "episode_reward_min": -93.68366487961454, "date": "2025-09-04_19-03-38", "episode_reward_max": 4.000525533646388, "pid": 3651948, "timestamp": 1757005418, "episode_reward_mean": -42.65263693139008, "time_total_s": 10245.876401901245, "episodes_total": 7599, "episode_len_mean": 28.76}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 10279.449191570282, "info": {"sample_time_ms": 33644.665, "num_steps_trained": 326400, "grad_time_ms": 369.57, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 484.3214111328125, "policy_loss": -0.16890767216682434, "vf_explained_var": 0.033569660037755966, "entropy": 11.223655700683594, "cur_lr": 4.999999873689376e-05, "total_loss": 484.1768798828125, "kl": 0.016036422923207283}, "load_time_ms": 0.693, "num_steps_sampled": 326400, "update_time_ms": 2.591}, "training_iteration": 272, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.572789669036865, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 326400, "timesteps_total": 326400, "custom_metrics": {}, "iterations_since_restore": 272, "episodes_this_iter": 41, "episode_reward_min": -93.68366487961454, "date": "2025-09-04_19-04-11", "episode_reward_max": 8.000000473594405, "pid": 3651948, "timestamp": 1757005451, "episode_reward_mean": -45.00275549680281, "time_total_s": 10279.449191570282, "episodes_total": 7640, "episode_len_mean": 30.04}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 10313.695538282394, "info": {"sample_time_ms": 33680.507, "num_steps_trained": 327600, "grad_time_ms": 369.417, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 427.5572509765625, "policy_loss": -0.16799139976501465, "vf_explained_var": 0.02995210886001587, "entropy": 11.145977020263672, "cur_lr": 4.999999873689376e-05, "total_loss": 427.41265869140625, "kl": 0.01543845422565937}, "load_time_ms": 0.715, "num_steps_sampled": 327600, "update_time_ms": 2.579}, "training_iteration": 273, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.24634671211243, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 327600, "timesteps_total": 327600, "custom_metrics": {}, "iterations_since_restore": 273, "episodes_this_iter": 40, "episode_reward_min": -93.68366487961454, "date": "2025-09-04_19-04-45", "episode_reward_max": 8.000000473594405, "pid": 3651948, "timestamp": 1757005485, "episode_reward_mean": -46.60512579057071, "time_total_s": 10313.695538282394, "episodes_total": 7680, "episode_len_mean": 30.86}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 10347.263674736023, "info": {"sample_time_ms": 33567.666, "num_steps_trained": 328800, "grad_time_ms": 369.409, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 502.9129333496094, "policy_loss": -0.17614038288593292, "vf_explained_var": 0.022216200828552246, "entropy": 11.216800689697266, "cur_lr": 4.999999873689376e-05, "total_loss": 502.7601013183594, "kl": 0.015363307669758797}, "load_time_ms": 0.727, "num_steps_sampled": 328800, "update_time_ms": 2.56}, "training_iteration": 274, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.56813645362854, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 328800, "timesteps_total": 328800, "custom_metrics": {}, "iterations_since_restore": 274, "episodes_this_iter": 38, "episode_reward_min": -91.98634031365813, "date": "2025-09-04_19-05-19", "episode_reward_max": 8.000225089274451, "pid": 3651948, "timestamp": 1757005519, "episode_reward_mean": -43.859947842870355, "time_total_s": 10347.263674736023, "episodes_total": 7718, "episode_len_mean": 29.61}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 10381.212057828903, "info": {"sample_time_ms": 33646.788, "num_steps_trained": 330000, "grad_time_ms": 369.366, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 453.77984619140625, "policy_loss": -0.16606299579143524, "vf_explained_var": 0.03338143602013588, "entropy": 10.97883415222168, "cur_lr": 4.999999873689376e-05, "total_loss": 453.6378479003906, "kl": 0.015875400975346565}, "load_time_ms": 0.726, "num_steps_sampled": 330000, "update_time_ms": 2.615}, "training_iteration": 275, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.94838309288025, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 330000, "timesteps_total": 330000, "custom_metrics": {}, "iterations_since_restore": 275, "episodes_this_iter": 39, "episode_reward_min": -91.98634031365813, "date": "2025-09-04_19-05-53", "episode_reward_max": 8.000225089274451, "pid": 3651948, "timestamp": 1757005553, "episode_reward_mean": -46.33955160056749, "time_total_s": 10381.212057828903, "episodes_total": 7757, "episode_len_mean": 30.69}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 10415.166213274002, "info": {"sample_time_ms": 33680.273, "num_steps_trained": 331200, "grad_time_ms": 372.268, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 450.1102294921875, "policy_loss": -0.17988114058971405, "vf_explained_var": 0.025934258475899696, "entropy": 11.465625762939453, "cur_lr": 4.999999873689376e-05, "total_loss": 449.95477294921875, "kl": 0.016104480251669884}, "load_time_ms": 0.742, "num_steps_sampled": 331200, "update_time_ms": 2.587}, "training_iteration": 276, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.95415544509888, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 331200, "timesteps_total": 331200, "custom_metrics": {}, "iterations_since_restore": 276, "episodes_this_iter": 37, "episode_reward_min": -90.53218214614839, "date": "2025-09-04_19-06-27", "episode_reward_max": 8.000225089274451, "pid": 3651948, "timestamp": 1757005587, "episode_reward_mean": -49.45591814832428, "time_total_s": 10415.166213274002, "episodes_total": 7794, "episode_len_mean": 32.37}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 10450.337350845337, "info": {"sample_time_ms": 33754.495, "num_steps_trained": 332400, "grad_time_ms": 372.366, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 469.13031005859375, "policy_loss": -0.16875097155570984, "vf_explained_var": 0.023109469562768936, "entropy": 11.081283569335938, "cur_lr": 4.999999873689376e-05, "total_loss": 468.9852294921875, "kl": 0.015559237450361252}, "load_time_ms": 0.73, "num_steps_sampled": 332400, "update_time_ms": 2.592}, "training_iteration": 277, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 35.17113757133484, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 332400, "timesteps_total": 332400, "custom_metrics": {}, "iterations_since_restore": 277, "episodes_this_iter": 43, "episode_reward_min": -90.53218214614839, "date": "2025-09-04_19-07-02", "episode_reward_max": 8.000071739314354, "pid": 3651948, "timestamp": 1757005622, "episode_reward_mean": -44.07394949871933, "time_total_s": 10450.337350845337, "episodes_total": 7837, "episode_len_mean": 29.69}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 10484.945219278336, "info": {"sample_time_ms": 33754.833, "num_steps_trained": 333600, "grad_time_ms": 370.167, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 543.11328125, "policy_loss": -0.17992782592773438, "vf_explained_var": 0.018156178295612335, "entropy": 10.869085311889648, "cur_lr": 4.999999873689376e-05, "total_loss": 542.9584350585938, "kl": 0.01653093658387661}, "load_time_ms": 0.705, "num_steps_sampled": 333600, "update_time_ms": 2.579}, "training_iteration": 278, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.60786843299866, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 333600, "timesteps_total": 333600, "custom_metrics": {}, "iterations_since_restore": 278, "episodes_this_iter": 41, "episode_reward_min": -91.15067117704314, "date": "2025-09-04_19-07-37", "episode_reward_max": 8.000071739314354, "pid": 3651948, "timestamp": 1757005657, "episode_reward_mean": -42.293963324320195, "time_total_s": 10484.945219278336, "episodes_total": 7878, "episode_len_mean": 28.67}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 10518.67206120491, "info": {"sample_time_ms": 33760.475, "num_steps_trained": 334800, "grad_time_ms": 372.736, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 473.5882263183594, "policy_loss": -0.17750756442546844, "vf_explained_var": 0.03091849945485592, "entropy": 11.348112106323242, "cur_lr": 4.999999873689376e-05, "total_loss": 473.4354248046875, "kl": 0.01630318909883499}, "load_time_ms": 0.713, "num_steps_sampled": 334800, "update_time_ms": 2.583}, "training_iteration": 279, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.72684192657471, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 334800, "timesteps_total": 334800, "custom_metrics": {}, "iterations_since_restore": 279, "episodes_this_iter": 41, "episode_reward_min": -91.15067117704314, "date": "2025-09-04_19-08-11", "episode_reward_max": 8.000004903249033, "pid": 3651948, "timestamp": 1757005691, "episode_reward_mean": -44.9513265938712, "time_total_s": 10518.67206120491, "episodes_total": 7919, "episode_len_mean": 29.95}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 10552.616226434708, "info": {"sample_time_ms": 33665.889, "num_steps_trained": 336000, "grad_time_ms": 373.425, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 491.8249816894531, "policy_loss": -0.17976070940494537, "vf_explained_var": 0.02973158471286297, "entropy": 10.85261058807373, "cur_lr": 4.999999873689376e-05, "total_loss": 491.6720886230469, "kl": 0.017699040472507477}, "load_time_ms": 0.718, "num_steps_sampled": 336000, "update_time_ms": 2.594}, "training_iteration": 280, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.94416522979736, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 336000, "timesteps_total": 336000, "custom_metrics": {}, "iterations_since_restore": 280, "episodes_this_iter": 38, "episode_reward_min": -95.3722150015734, "date": "2025-09-04_19-08-44", "episode_reward_max": 8.000004903249033, "pid": 3651948, "timestamp": 1757005724, "episode_reward_mean": -44.50705614322201, "time_total_s": 10552.616226434708, "episodes_total": 7957, "episode_len_mean": 29.95}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 10586.572783470154, "info": {"sample_time_ms": 33685.632, "num_steps_trained": 337200, "grad_time_ms": 376.163, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 488.8636169433594, "policy_loss": -0.1731945276260376, "vf_explained_var": 0.04734458401799202, "entropy": 11.287887573242188, "cur_lr": 4.999999873689376e-05, "total_loss": 488.71331787109375, "kl": 0.015083376318216324}, "load_time_ms": 0.716, "num_steps_sampled": 337200, "update_time_ms": 2.562}, "training_iteration": 281, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.95655703544617, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 337200, "timesteps_total": 337200, "custom_metrics": {}, "iterations_since_restore": 281, "episodes_this_iter": 36, "episode_reward_min": -95.3722150015734, "date": "2025-09-04_19-09-19", "episode_reward_max": 6.000347435424667, "pid": 3651948, "timestamp": 1757005759, "episode_reward_mean": -48.719626192050356, "time_total_s": 10586.572783470154, "episodes_total": 7993, "episode_len_mean": 32.19}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 10620.745208978653, "info": {"sample_time_ms": 33745.513, "num_steps_trained": 338400, "grad_time_ms": 376.235, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 491.0636901855469, "policy_loss": -0.17348407208919525, "vf_explained_var": 0.02385639399290085, "entropy": 10.733929634094238, "cur_lr": 4.999999873689376e-05, "total_loss": 490.9172668457031, "kl": 0.017822520807385445}, "load_time_ms": 0.711, "num_steps_sampled": 338400, "update_time_ms": 2.588}, "training_iteration": 282, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.172425508499146, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 338400, "timesteps_total": 338400, "custom_metrics": {}, "iterations_since_restore": 282, "episodes_this_iter": 45, "episode_reward_min": -91.86570982637153, "date": "2025-09-04_19-09-53", "episode_reward_max": 6.001391594613905, "pid": 3651948, "timestamp": 1757005793, "episode_reward_mean": -44.18720843152826, "time_total_s": 10620.745208978653, "episodes_total": 8038, "episode_len_mean": 29.89}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 10654.505257368088, "info": {"sample_time_ms": 33698.56, "num_steps_trained": 339600, "grad_time_ms": 374.566, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 443.69537353515625, "policy_loss": -0.17414651811122894, "vf_explained_var": 0.032790109515190125, "entropy": 11.28775691986084, "cur_lr": 4.999999873689376e-05, "total_loss": 443.5450744628906, "kl": 0.015703819692134857}, "load_time_ms": 0.684, "num_steps_sampled": 339600, "update_time_ms": 2.592}, "training_iteration": 283, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.760048389434814, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 339600, "timesteps_total": 339600, "custom_metrics": {}, "iterations_since_restore": 283, "episodes_this_iter": 39, "episode_reward_min": -91.8255611391432, "date": "2025-09-04_19-10-26", "episode_reward_max": 6.001391594613905, "pid": 3651948, "timestamp": 1757005826, "episode_reward_mean": -43.44958326804987, "time_total_s": 10654.505257368088, "episodes_total": 8077, "episode_len_mean": 29.28}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 10688.552230834961, "info": {"sample_time_ms": 33748.997, "num_steps_trained": 340800, "grad_time_ms": 372.023, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 443.31854248046875, "policy_loss": -0.18360702693462372, "vf_explained_var": 0.026069827377796173, "entropy": 10.978754997253418, "cur_lr": 4.999999873689376e-05, "total_loss": 443.1598815917969, "kl": 0.01643957756459713}, "load_time_ms": 0.672, "num_steps_sampled": 340800, "update_time_ms": 2.593}, "training_iteration": 284, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.04697346687317, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 340800, "timesteps_total": 340800, "custom_metrics": {}, "iterations_since_restore": 284, "episodes_this_iter": 38, "episode_reward_min": -91.8255611391432, "date": "2025-09-04_19-11-01", "episode_reward_max": 8.00000043651723, "pid": 3651948, "timestamp": 1757005861, "episode_reward_mean": -47.55785499302104, "time_total_s": 10688.552230834961, "episodes_total": 8115, "episode_len_mean": 31.36}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 10721.957757472992, "info": {"sample_time_ms": 33694.925, "num_steps_trained": 342000, "grad_time_ms": 371.863, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 499.5901184082031, "policy_loss": -0.17610689997673035, "vf_explained_var": 0.019171714782714844, "entropy": 10.866064071655273, "cur_lr": 4.999999873689376e-05, "total_loss": 499.4377746582031, "kl": 0.015659630298614502}, "load_time_ms": 0.668, "num_steps_sampled": 342000, "update_time_ms": 2.537}, "training_iteration": 285, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.405526638031006, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 342000, "timesteps_total": 342000, "custom_metrics": {}, "iterations_since_restore": 285, "episodes_this_iter": 40, "episode_reward_min": -91.22999134012633, "date": "2025-09-04_19-11-34", "episode_reward_max": 8.000052252170565, "pid": 3651948, "timestamp": 1757005894, "episode_reward_mean": -45.63354421876618, "time_total_s": 10721.957757472992, "episodes_total": 8155, "episode_len_mean": 30.29}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 10756.137785673141, "info": {"sample_time_ms": 33718.46, "num_steps_trained": 343200, "grad_time_ms": 370.937, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 465.80670166015625, "policy_loss": -0.17610350251197815, "vf_explained_var": 0.034203190356492996, "entropy": 10.926987648010254, "cur_lr": 4.999999873689376e-05, "total_loss": 465.6551208496094, "kl": 0.016123224049806595}, "load_time_ms": 0.654, "num_steps_sampled": 343200, "update_time_ms": 2.537}, "training_iteration": 286, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.180028200149536, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 343200, "timesteps_total": 343200, "custom_metrics": {}, "iterations_since_restore": 286, "episodes_this_iter": 41, "episode_reward_min": -91.22999134012633, "date": "2025-09-04_19-12-08", "episode_reward_max": 8.000052252170565, "pid": 3651948, "timestamp": 1757005928, "episode_reward_mean": -44.48674554696858, "time_total_s": 10756.137785673141, "episodes_total": 8196, "episode_len_mean": 29.72}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 10790.090457201004, "info": {"sample_time_ms": 33595.784, "num_steps_trained": 344400, "grad_time_ms": 371.739, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 509.7332763671875, "policy_loss": -0.18097235262393951, "vf_explained_var": 0.026486733928322792, "entropy": 11.022765159606934, "cur_lr": 4.999999873689376e-05, "total_loss": 509.57720947265625, "kl": 0.016459709033370018}, "load_time_ms": 0.657, "num_steps_sampled": 344400, "update_time_ms": 2.535}, "training_iteration": 287, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.95267152786255, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 344400, "timesteps_total": 344400, "custom_metrics": {}, "iterations_since_restore": 287, "episodes_this_iter": 39, "episode_reward_min": -90.82597945393007, "date": "2025-09-04_19-12-42", "episode_reward_max": 6.000163794206194, "pid": 3651948, "timestamp": 1757005962, "episode_reward_mean": -43.89554236020127, "time_total_s": 10790.090457201004, "episodes_total": 8235, "episode_len_mean": 29.7}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 10824.339283704758, "info": {"sample_time_ms": 33558.228, "num_steps_trained": 345600, "grad_time_ms": 373.358, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 479.1529846191406, "policy_loss": -0.17926204204559326, "vf_explained_var": 0.021100951358675957, "entropy": 11.003413200378418, "cur_lr": 4.999999873689376e-05, "total_loss": 478.9981689453125, "kl": 0.016063014045357704}, "load_time_ms": 0.66, "num_steps_sampled": 345600, "update_time_ms": 2.549}, "training_iteration": 288, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.24882650375366, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 345600, "timesteps_total": 345600, "custom_metrics": {}, "iterations_since_restore": 288, "episodes_this_iter": 47, "episode_reward_min": -90.82597945393007, "date": "2025-09-04_19-13-16", "episode_reward_max": 8.000000787655095, "pid": 3651948, "timestamp": 1757005996, "episode_reward_mean": -42.089675947863626, "time_total_s": 10824.339283704758, "episodes_total": 8282, "episode_len_mean": 28.73}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 10857.90751862526, "info": {"sample_time_ms": 33545.668, "num_steps_trained": 346800, "grad_time_ms": 370.082, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 489.45941162109375, "policy_loss": -0.16726961731910706, "vf_explained_var": 0.021498076617717743, "entropy": 11.020486831665039, "cur_lr": 4.999999873689376e-05, "total_loss": 489.3161315917969, "kl": 0.015811540186405182}, "load_time_ms": 0.656, "num_steps_sampled": 346800, "update_time_ms": 2.557}, "training_iteration": 289, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.56823492050171, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 346800, "timesteps_total": 346800, "custom_metrics": {}, "iterations_since_restore": 289, "episodes_this_iter": 44, "episode_reward_min": -92.1446320532424, "date": "2025-09-04_19-13-50", "episode_reward_max": 8.000000787655095, "pid": 3651948, "timestamp": 1757006030, "episode_reward_mean": -38.038023044437594, "time_total_s": 10857.90751862526, "episodes_total": 8326, "episode_len_mean": 26.54}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 10891.880641222, "info": {"sample_time_ms": 33547.73, "num_steps_trained": 348000, "grad_time_ms": 370.899, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 477.1534423828125, "policy_loss": -0.18578127026557922, "vf_explained_var": 0.020566217601299286, "entropy": 11.158267974853516, "cur_lr": 4.999999873689376e-05, "total_loss": 476.99273681640625, "kl": 0.016526976600289345}, "load_time_ms": 0.674, "num_steps_sampled": 348000, "update_time_ms": 2.548}, "training_iteration": 290, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.97312259674072, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 348000, "timesteps_total": 348000, "custom_metrics": {}, "iterations_since_restore": 290, "episodes_this_iter": 40, "episode_reward_min": -92.1446320532424, "date": "2025-09-04_19-14-24", "episode_reward_max": 6.00023374893414, "pid": 3651948, "timestamp": 1757006064, "episode_reward_mean": -43.65306031492569, "time_total_s": 10891.880641222, "episodes_total": 8366, "episode_len_mean": 29.34}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 10927.529315710068, "info": {"sample_time_ms": 33717.148, "num_steps_trained": 349200, "grad_time_ms": 370.706, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 486.17718505859375, "policy_loss": -0.16736049950122833, "vf_explained_var": 0.017367621883749962, "entropy": 11.131400108337402, "cur_lr": 4.999999873689376e-05, "total_loss": 486.0347900390625, "kl": 0.01641010493040085}, "load_time_ms": 0.685, "num_steps_sampled": 349200, "update_time_ms": 2.533}, "training_iteration": 291, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 35.64867448806763, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 349200, "timesteps_total": 349200, "custom_metrics": {}, "iterations_since_restore": 291, "episodes_this_iter": 41, "episode_reward_min": -92.21221438592339, "date": "2025-09-04_19-15-00", "episode_reward_max": 8.000000400008378, "pid": 3651948, "timestamp": 1757006100, "episode_reward_mean": -43.812856947642615, "time_total_s": 10927.529315710068, "episodes_total": 8407, "episode_len_mean": 29.49}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 10963.153591632843, "info": {"sample_time_ms": 33864.213, "num_steps_trained": 350400, "grad_time_ms": 368.848, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 459.4781799316406, "policy_loss": -0.17313633859157562, "vf_explained_var": 0.01630322076380253, "entropy": 10.939611434936523, "cur_lr": 4.999999873689376e-05, "total_loss": 459.33050537109375, "kl": 0.016762765124440193}, "load_time_ms": 0.687, "num_steps_sampled": 350400, "update_time_ms": 2.517}, "training_iteration": 292, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 35.62427592277527, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 350400, "timesteps_total": 350400, "custom_metrics": {}, "iterations_since_restore": 292, "episodes_this_iter": 41, "episode_reward_min": -92.72521688390759, "date": "2025-09-04_19-15-35", "episode_reward_max": 8.000000400008378, "pid": 3651948, "timestamp": 1757006135, "episode_reward_mean": -44.099940482126186, "time_total_s": 10963.153591632843, "episodes_total": 8448, "episode_len_mean": 29.8}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 10997.174255371094, "info": {"sample_time_ms": 33888.988, "num_steps_trained": 351600, "grad_time_ms": 370.137, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 427.449462890625, "policy_loss": -0.1794745922088623, "vf_explained_var": 0.028868675231933594, "entropy": 11.113698959350586, "cur_lr": 4.999999873689376e-05, "total_loss": 427.29345703125, "kl": 0.015439452603459358}, "load_time_ms": 0.692, "num_steps_sampled": 351600, "update_time_ms": 2.493}, "training_iteration": 293, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.02066373825073, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 351600, "timesteps_total": 351600, "custom_metrics": {}, "iterations_since_restore": 293, "episodes_this_iter": 43, "episode_reward_min": -92.72521688390759, "date": "2025-09-04_19-16-09", "episode_reward_max": 8.000000414224198, "pid": 3651948, "timestamp": 1757006169, "episode_reward_mean": -42.17512389247495, "time_total_s": 10997.174255371094, "episodes_total": 8491, "episode_len_mean": 29.01}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 11031.405236959457, "info": {"sample_time_ms": 33907.047, "num_steps_trained": 352800, "grad_time_ms": 370.504, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 510.30328369140625, "policy_loss": -0.18082945048809052, "vf_explained_var": 0.014124538749456406, "entropy": 11.173457145690918, "cur_lr": 4.999999873689376e-05, "total_loss": 510.14752197265625, "kl": 0.01647772826254368}, "load_time_ms": 0.692, "num_steps_sampled": 352800, "update_time_ms": 2.501}, "training_iteration": 294, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.23098158836365, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 352800, "timesteps_total": 352800, "custom_metrics": {}, "iterations_since_restore": 294, "episodes_this_iter": 39, "episode_reward_min": -92.72521688390759, "date": "2025-09-04_19-16-44", "episode_reward_max": 8.000000414224198, "pid": 3651948, "timestamp": 1757006204, "episode_reward_mean": -40.4530542152126, "time_total_s": 11031.405236959457, "episodes_total": 8530, "episode_len_mean": 27.98}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 11065.227889537811, "info": {"sample_time_ms": 33947.023, "num_steps_trained": 354000, "grad_time_ms": 372.241, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 533.9700927734375, "policy_loss": -0.18131543695926666, "vf_explained_var": 0.018143661320209503, "entropy": 10.805140495300293, "cur_lr": 4.999999873689376e-05, "total_loss": 533.8145751953125, "kl": 0.016999023035168648}, "load_time_ms": 0.697, "num_steps_sampled": 354000, "update_time_ms": 2.498}, "training_iteration": 295, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.82265257835388, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 354000, "timesteps_total": 354000, "custom_metrics": {}, "iterations_since_restore": 295, "episodes_this_iter": 38, "episode_reward_min": -92.81093397258293, "date": "2025-09-04_19-17-17", "episode_reward_max": 6.000249367189705, "pid": 3651948, "timestamp": 1757006237, "episode_reward_mean": -46.47372406120076, "time_total_s": 11065.227889537811, "episodes_total": 8568, "episode_len_mean": 30.75}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 11100.164932012558, "info": {"sample_time_ms": 34022.613, "num_steps_trained": 355200, "grad_time_ms": 372.335, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 457.92620849609375, "policy_loss": -0.16722075641155243, "vf_explained_var": 0.03289921581745148, "entropy": 11.11696720123291, "cur_lr": 4.999999873689376e-05, "total_loss": 457.78167724609375, "kl": 0.014939261600375175}, "load_time_ms": 0.703, "num_steps_sampled": 355200, "update_time_ms": 2.498}, "training_iteration": 296, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.937042474746704, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 355200, "timesteps_total": 355200, "custom_metrics": {}, "iterations_since_restore": 296, "episodes_this_iter": 43, "episode_reward_min": -92.81093397258293, "date": "2025-09-04_19-17-52", "episode_reward_max": 8.000185893064492, "pid": 3651948, "timestamp": 1757006272, "episode_reward_mean": -45.086113935137554, "time_total_s": 11100.164932012558, "episodes_total": 8611, "episode_len_mean": 29.87}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 11134.67031955719, "info": {"sample_time_ms": 34077.17, "num_steps_trained": 356400, "grad_time_ms": 373.02, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 457.5201721191406, "policy_loss": -0.16585397720336914, "vf_explained_var": 0.01322248950600624, "entropy": 10.969801902770996, "cur_lr": 4.999999873689376e-05, "total_loss": 457.3786926269531, "kl": 0.016034726053476334}, "load_time_ms": 0.707, "num_steps_sampled": 356400, "update_time_ms": 2.507}, "training_iteration": 297, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.50538754463196, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 356400, "timesteps_total": 356400, "custom_metrics": {}, "iterations_since_restore": 297, "episodes_this_iter": 44, "episode_reward_min": -91.00477448244305, "date": "2025-09-04_19-18-27", "episode_reward_max": 8.000185893064492, "pid": 3651948, "timestamp": 1757006307, "episode_reward_mean": -44.24866296136565, "time_total_s": 11134.67031955719, "episodes_total": 8655, "episode_len_mean": 29.73}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 11168.482450246811, "info": {"sample_time_ms": 34032.64, "num_steps_trained": 357600, "grad_time_ms": 373.878, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 438.6772155761719, "policy_loss": -0.17128852009773254, "vf_explained_var": 0.04709920659661293, "entropy": 10.442963600158691, "cur_lr": 4.999999873689376e-05, "total_loss": 438.5312805175781, "kl": 0.016678836196660995}, "load_time_ms": 0.706, "num_steps_sampled": 357600, "update_time_ms": 2.507}, "training_iteration": 298, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.81213068962097, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 357600, "timesteps_total": 357600, "custom_metrics": {}, "iterations_since_restore": 298, "episodes_this_iter": 39, "episode_reward_min": -91.00477448244305, "date": "2025-09-04_19-19-01", "episode_reward_max": 4.000119808434576, "pid": 3651948, "timestamp": 1757006341, "episode_reward_mean": -42.03931444679164, "time_total_s": 11168.482450246811, "episodes_total": 8694, "episode_len_mean": 28.7}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 11201.78668999672, "info": {"sample_time_ms": 34004.342, "num_steps_trained": 358800, "grad_time_ms": 375.751, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 447.9620361328125, "policy_loss": -0.16031722724437714, "vf_explained_var": 0.02535586804151535, "entropy": 11.190040588378906, "cur_lr": 4.999999873689376e-05, "total_loss": 447.82476806640625, "kl": 0.015202601440250874}, "load_time_ms": 0.708, "num_steps_sampled": 358800, "update_time_ms": 2.519}, "training_iteration": 299, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.30423974990845, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 358800, "timesteps_total": 358800, "custom_metrics": {}, "iterations_since_restore": 299, "episodes_this_iter": 37, "episode_reward_min": -90.47411887573381, "date": "2025-09-04_19-19-34", "episode_reward_max": 4.000031670263265, "pid": 3651948, "timestamp": 1757006374, "episode_reward_mean": -45.819786978871925, "time_total_s": 11201.78668999672, "episodes_total": 8731, "episode_len_mean": 30.58}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 11236.350129127502, "info": {"sample_time_ms": 34066.232, "num_steps_trained": 360000, "grad_time_ms": 372.985, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 470.8668212890625, "policy_loss": -0.17791648209095, "vf_explained_var": 0.025730881839990616, "entropy": 10.827828407287598, "cur_lr": 4.999999873689376e-05, "total_loss": 470.7132568359375, "kl": 0.01607631705701351}, "load_time_ms": 0.684, "num_steps_sampled": 360000, "update_time_ms": 2.496}, "training_iteration": 300, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.56343913078308, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 360000, "timesteps_total": 360000, "custom_metrics": {}, "iterations_since_restore": 300, "episodes_this_iter": 42, "episode_reward_min": -89.7114173671742, "date": "2025-09-04_19-20-09", "episode_reward_max": 4.000438841326207, "pid": 3651948, "timestamp": 1757006409, "episode_reward_mean": -43.891822344584035, "time_total_s": 11236.350129127502, "episodes_total": 8773, "episode_len_mean": 29.8}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 11270.30946135521, "info": {"sample_time_ms": 33899.468, "num_steps_trained": 361200, "grad_time_ms": 370.843, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 465.0353088378906, "policy_loss": -0.17757363617420197, "vf_explained_var": 0.025346828624606133, "entropy": 10.962993621826172, "cur_lr": 4.999999873689376e-05, "total_loss": 464.88232421875, "kl": 0.016150841489434242}, "load_time_ms": 0.673, "num_steps_sampled": 361200, "update_time_ms": 2.498}, "training_iteration": 301, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.95933222770691, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 361200, "timesteps_total": 361200, "custom_metrics": {}, "iterations_since_restore": 301, "episodes_this_iter": 43, "episode_reward_min": -90.85085832516624, "date": "2025-09-04_19-20-43", "episode_reward_max": 4.000807212266899, "pid": 3651948, "timestamp": 1757006443, "episode_reward_mean": -41.096598915944796, "time_total_s": 11270.30946135521, "episodes_total": 8816, "episode_len_mean": 28.4}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 11304.162751197815, "info": {"sample_time_ms": 33720.232, "num_steps_trained": 362400, "grad_time_ms": 372.928, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 440.8446960449219, "policy_loss": -0.1764705777168274, "vf_explained_var": 0.03883038088679314, "entropy": 10.327861785888672, "cur_lr": 4.999999873689376e-05, "total_loss": 440.6927185058594, "kl": 0.016116444021463394}, "load_time_ms": 0.664, "num_steps_sampled": 362400, "update_time_ms": 2.526}, "training_iteration": 302, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.85328984260559, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 362400, "timesteps_total": 362400, "custom_metrics": {}, "iterations_since_restore": 302, "episodes_this_iter": 41, "episode_reward_min": -90.85085832516624, "date": "2025-09-04_19-21-17", "episode_reward_max": 6.000016819112087, "pid": 3651948, "timestamp": 1757006477, "episode_reward_mean": -40.31461772771617, "time_total_s": 11304.162751197815, "episodes_total": 8857, "episode_len_mean": 27.8}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 11337.90143108368, "info": {"sample_time_ms": 33691.236, "num_steps_trained": 363600, "grad_time_ms": 373.751, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 488.3429870605469, "policy_loss": -0.17381690442562103, "vf_explained_var": 0.00864805094897747, "entropy": 10.813539505004883, "cur_lr": 4.999999873689376e-05, "total_loss": 488.19244384765625, "kl": 0.015300876460969448}, "load_time_ms": 0.677, "num_steps_sampled": 363600, "update_time_ms": 2.546}, "training_iteration": 303, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.73867988586426, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 363600, "timesteps_total": 363600, "custom_metrics": {}, "iterations_since_restore": 303, "episodes_this_iter": 49, "episode_reward_min": -90.38188123848992, "date": "2025-09-04_19-21-50", "episode_reward_max": 8.00014073366246, "pid": 3651948, "timestamp": 1757006510, "episode_reward_mean": -39.27054027924591, "time_total_s": 11337.90143108368, "episodes_total": 8906, "episode_len_mean": 27.24}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 11372.542692661285, "info": {"sample_time_ms": 33733.143, "num_steps_trained": 364800, "grad_time_ms": 372.899, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 452.9057312011719, "policy_loss": -0.17092859745025635, "vf_explained_var": 0.041277069598436356, "entropy": 10.396652221679688, "cur_lr": 4.999999873689376e-05, "total_loss": 452.75872802734375, "kl": 0.015768442302942276}, "load_time_ms": 0.679, "num_steps_sampled": 364800, "update_time_ms": 2.516}, "training_iteration": 304, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.6412615776062, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 364800, "timesteps_total": 364800, "custom_metrics": {}, "iterations_since_restore": 304, "episodes_this_iter": 45, "episode_reward_min": -90.7861691416485, "date": "2025-09-04_19-22-25", "episode_reward_max": 8.00014073366246, "pid": 3651948, "timestamp": 1757006545, "episode_reward_mean": -36.5787179220657, "time_total_s": 11372.542692661285, "episodes_total": 8951, "episode_len_mean": 25.85}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 11406.358407497406, "info": {"sample_time_ms": 33733.764, "num_steps_trained": 366000, "grad_time_ms": 371.583, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 446.9495849609375, "policy_loss": -0.17110978066921234, "vf_explained_var": 0.03779573738574982, "entropy": 10.373178482055664, "cur_lr": 4.999999873689376e-05, "total_loss": 446.8055419921875, "kl": 0.0178191140294075}, "load_time_ms": 0.676, "num_steps_sampled": 366000, "update_time_ms": 2.511}, "training_iteration": 305, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.815714836120605, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 366000, "timesteps_total": 366000, "custom_metrics": {}, "iterations_since_restore": 305, "episodes_this_iter": 39, "episode_reward_min": -92.60063372662192, "date": "2025-09-04_19-22-59", "episode_reward_max": 6.000007229369329, "pid": 3651948, "timestamp": 1757006579, "episode_reward_mean": -39.96323283805395, "time_total_s": 11406.358407497406, "episodes_total": 8990, "episode_len_mean": 27.7}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 11439.82025885582, "info": {"sample_time_ms": 33585.514, "num_steps_trained": 367200, "grad_time_ms": 372.256, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 436.24420166015625, "policy_loss": -0.17119070887565613, "vf_explained_var": 0.0166848823428154, "entropy": 10.841540336608887, "cur_lr": 4.999999873689376e-05, "total_loss": 436.0961608886719, "kl": 0.015268008224666119}, "load_time_ms": 0.686, "num_steps_sampled": 367200, "update_time_ms": 2.526}, "training_iteration": 306, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.461851358413696, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 367200, "timesteps_total": 367200, "custom_metrics": {}, "iterations_since_restore": 306, "episodes_this_iter": 41, "episode_reward_min": -92.60063372662192, "date": "2025-09-04_19-23-32", "episode_reward_max": 6.000004586562605, "pid": 3651948, "timestamp": 1757006612, "episode_reward_mean": -40.937305473336274, "time_total_s": 11439.82025885582, "episodes_total": 9031, "episode_len_mean": 28.35}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 11474.576438903809, "info": {"sample_time_ms": 33611.464, "num_steps_trained": 368400, "grad_time_ms": 371.401, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 468.3873291015625, "policy_loss": -0.16533058881759644, "vf_explained_var": 0.018551025539636612, "entropy": 10.50613021850586, "cur_lr": 4.999999873689376e-05, "total_loss": 468.24755859375, "kl": 0.016813894733786583}, "load_time_ms": 0.684, "num_steps_sampled": 368400, "update_time_ms": 2.529}, "training_iteration": 307, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.75618004798889, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 368400, "timesteps_total": 368400, "custom_metrics": {}, "iterations_since_restore": 307, "episodes_this_iter": 48, "episode_reward_min": -92.60063372662192, "date": "2025-09-04_19-24-07", "episode_reward_max": 8.000000400007286, "pid": 3651948, "timestamp": 1757006647, "episode_reward_mean": -41.111321091324605, "time_total_s": 11474.576438903809, "episodes_total": 9079, "episode_len_mean": 28.19}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 11509.679752349854, "info": {"sample_time_ms": 33741.183, "num_steps_trained": 369600, "grad_time_ms": 370.742, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 470.12060546875, "policy_loss": -0.18637312948703766, "vf_explained_var": 0.030650615692138672, "entropy": 10.451064109802246, "cur_lr": 4.999999873689376e-05, "total_loss": 469.9599609375, "kl": 0.016943683847784996}, "load_time_ms": 0.695, "num_steps_sampled": 369600, "update_time_ms": 2.561}, "training_iteration": 308, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 35.10331344604492, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 369600, "timesteps_total": 369600, "custom_metrics": {}, "iterations_since_restore": 308, "episodes_this_iter": 51, "episode_reward_min": -91.00169171281546, "date": "2025-09-04_19-24-42", "episode_reward_max": 8.000000400007286, "pid": 3651948, "timestamp": 1757006682, "episode_reward_mean": -34.03625109420629, "time_total_s": 11509.679752349854, "episodes_total": 9130, "episode_len_mean": 24.58}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 11543.63292002678, "info": {"sample_time_ms": 33807.24, "num_steps_trained": 370800, "grad_time_ms": 369.547, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 442.1883239746094, "policy_loss": -0.1690937876701355, "vf_explained_var": 0.02821219712495804, "entropy": 10.33169174194336, "cur_lr": 4.999999873689376e-05, "total_loss": 442.0450744628906, "kl": 0.017036719247698784}, "load_time_ms": 0.696, "num_steps_sampled": 370800, "update_time_ms": 2.551}, "training_iteration": 309, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 33.95316767692566, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 370800, "timesteps_total": 370800, "custom_metrics": {}, "iterations_since_restore": 309, "episodes_this_iter": 47, "episode_reward_min": -90.53602674662793, "date": "2025-09-04_19-25-16", "episode_reward_max": 8.000000798729044, "pid": 3651948, "timestamp": 1757006716, "episode_reward_mean": -34.02483593865472, "time_total_s": 11543.63292002678, "episodes_total": 9177, "episode_len_mean": 24.64}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 11579.952924489975, "info": {"sample_time_ms": 33982.89, "num_steps_trained": 372000, "grad_time_ms": 369.496, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 454.09228515625, "policy_loss": -0.1684252768754959, "vf_explained_var": 0.023490898311138153, "entropy": 10.880743980407715, "cur_lr": 4.999999873689376e-05, "total_loss": 453.9481506347656, "kl": 0.01598125509917736}, "load_time_ms": 0.699, "num_steps_sampled": 372000, "update_time_ms": 2.593}, "training_iteration": 310, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 36.3200044631958, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 372000, "timesteps_total": 372000, "custom_metrics": {}, "iterations_since_restore": 310, "episodes_this_iter": 39, "episode_reward_min": -90.92068676932722, "date": "2025-09-04_19-25-52", "episode_reward_max": 8.000000862998787, "pid": 3651948, "timestamp": 1757006752, "episode_reward_mean": -39.42615856737131, "time_total_s": 11579.952924489975, "episodes_total": 9216, "episode_len_mean": 27.46}
+{"hostname": "cda-server-2", "done": false, "time_since_restore": 11614.5580804348, "info": {"sample_time_ms": 34047.864, "num_steps_trained": 373200, "grad_time_ms": 369.15, "default": {"cur_kl_coeff": 1.5187499523162842, "vf_loss": 457.7786560058594, "policy_loss": -0.1775304675102234, "vf_explained_var": 0.0441647432744503, "entropy": 10.423691749572754, "cur_lr": 4.999999873689376e-05, "total_loss": 457.62640380859375, "kl": 0.016661131754517555}, "load_time_ms": 0.698, "num_steps_sampled": 373200, "update_time_ms": 2.534}, "training_iteration": 311, "config": {"clip_actions": true, "vf_share_layers": false, "entropy_coeff": 0.0, "clip_rewards": null, "clip_param": 0.3, "num_envs_per_worker": 1, "vf_loss_coeff": 1.0, "monitor": false, "observation_filter": "MeanStdFilter", "custom_resources_per_worker": {}, "sample_async": false, "optimizer": {}, "vf_clip_param": 10.0, "tf_session_args": {"device_count": {"CPU": 1}, "intra_op_parallelism_threads": 2, "log_device_placement": false, "allow_soft_placement": true, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}}, "collect_metrics_timeout": 180, "synchronize_filters": true, "sgd_minibatch_size": 128, "num_gpus": 0, "num_gpus_per_worker": 0, "batch_mode": "truncate_episodes", "compress_observations": false, "local_evaluator_tf_session_args": {"inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8}, "input": "sampler", "num_sgd_iter": 30, "simple_optimizer": false, "num_workers": 3, "multiagent": {"policies_to_train": null, "policy_graphs": {}, "policy_mapping_fn": null}, "input_evaluation": null, "preprocessor_pref": "deepmind", "callbacks": {"on_sample_end": null, "on_train_result": null, "on_episode_step": null, "on_episode_start": null, "on_episode_end": null}, "straggler_mitigation": false, "lr": 5e-05, "grad_clip": null, "output_compress_columns": ["obs", "new_obs"], "kl_target": 0.01, "output": null, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "env": "Zhenxin_S_FC", "lr_schedule": null, "log_level": "INFO", "postprocess_inputs": false, "use_gae": true, "model": {"use_lstm": false, "dim": 84, "max_seq_len": 20, "fcnet_hiddens": [128, 128, 128], "zero_mean": true, "custom_preprocessor": null, "grayscale": false, "squash_to_range": false, "lstm_cell_size": 256, "conv_activation": "relu", "conv_filters": null, "lstm_use_prev_action_reward": false, "free_log_std": false, "framestack": true, "custom_model": null, "custom_options": {}, "fcnet_activation": "tanh"}, "env_config": {"generalize": false, "run_valid": false}, "horizon": 50, "gamma": 0.99, "output_max_file_size": 67108864, "kl_coeff": 0.2, "sample_batch_size": 200, "lambda": 1.0, "train_batch_size": 1200}, "time_this_iter_s": 34.60515594482422, "num_metric_batches_dropped": 0, "policy_reward_mean": {}, "experiment_id": "881ce36181fe42dabe29289bda5f7577", "node_ip": "10.157.146.2", "timesteps_this_iter": 1200, "timesteps_since_restore": 373200, "timesteps_total": 373200, "custom_metrics": {}, "iterations